[
  {
    "path": "README.md",
    "content": "# Learning Actor Relation Graphs for Group Activity Recognition\n\nSource code for the following paper([arXiv link](https://arxiv.org/abs/1904.10117)):\n\n        Learning Actor Relation Graphs for Group Activity Recognition\n        Jianchao Wu, Limin Wang, Li Wang, Jie Guo, Gangshan Wu\n        in CVPR 2019\n        \n        \n\n\n## Dependencies\n\n- Python `3.x`\n- PyTorch `0.4.1`\n- numpy, pickle, scikit-image\n- [RoIAlign for Pytorch](https://github.com/longcw/RoIAlign.pytorch)\n- Datasets: [Volleyball](https://github.com/mostafa-saad/deep-activity-rec), [Collective](http://vhosts.eecs.umich.edu/vision//activity-dataset.html)\n\n\n\n\n## Prepare Datasets\n\n1. Download [volleyball](http://vml.cs.sfu.ca/wp-content/uploads/volleyballdataset/volleyball.zip) or [collective](http://vhosts.eecs.umich.edu/vision//ActivityDataset.zip) dataset file.\n2. Unzip the dataset file into `data/volleyball` or `data/collective`.\n\n\n\n\n## Get Started\n\n1. Stage1: Fine-tune the model on single frame without using GCN.\n\n    ```shell\n    # volleyball dataset\n    python scripts/train_volleyball_stage1.py\n    \n    # collective dataset\n    python scripts/train_collective_stage1.py\n    ```\n\n2. Stage2: Fix weights of the feature extraction part of network, and train the network with GCN.\n\n    ```shell\n    # volleyball dataset\n    python scripts/train_volleyball_stage2.py\n    \n    # collective dataset\n    python scripts/train_collective_stage2.py\n    ```\n    \n    You can specify the running arguments in the python files under `scripts/` directory. The meanings of arguments can be found in `config.py`\n\n\n\n## Citation\n\n```\n@inproceedings{CVPR2019_ARG,\n  title = {Learning Actor Relation Graphs for Group Activity Recognition},\n  author = {Jianchao Wu and Limin Wang and Li Wang and Jie Guo and Gangshan Wu},\n  booktitle = {CVPR},\n  year = {2019},\n}\n```\n\n\n\n"
  },
  {
    "path": "backbone.py",
    "content": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torchvision.models as models\n\n    \nclass MyInception_v3(nn.Module):\n    def __init__(self,transform_input=False,pretrained=False):\n        super(MyInception_v3,self).__init__()\n        self.transform_input=transform_input\n        inception=models.inception_v3(pretrained=pretrained)\n        \n        self.Conv2d_1a_3x3 = inception.Conv2d_1a_3x3\n        self.Conv2d_2a_3x3 = inception.Conv2d_2a_3x3\n        self.Conv2d_2b_3x3 = inception.Conv2d_2b_3x3\n        self.Conv2d_3b_1x1 = inception.Conv2d_3b_1x1\n        self.Conv2d_4a_3x3 = inception.Conv2d_4a_3x3\n        self.Mixed_5b = inception.Mixed_5b\n        self.Mixed_5c = inception.Mixed_5c\n        self.Mixed_5d = inception.Mixed_5d\n        self.Mixed_6a = inception.Mixed_6a\n        self.Mixed_6b = inception.Mixed_6b\n        self.Mixed_6c = inception.Mixed_6c\n        self.Mixed_6d = inception.Mixed_6d\n        self.Mixed_6e = inception.Mixed_6e\n        \n    def forward(self,x):\n        outputs=[]\n        \n        if self.transform_input:\n            x = x.clone()\n            x[:, 0] = x[:, 0] * (0.229 / 0.5) + (0.485 - 0.5) / 0.5\n            x[:, 1] = x[:, 1] * (0.224 / 0.5) + (0.456 - 0.5) / 0.5\n            x[:, 2] = x[:, 2] * (0.225 / 0.5) + (0.406 - 0.5) / 0.5\n        # 299 x 299 x 3\n        x = self.Conv2d_1a_3x3(x)\n        # 149 x 149 x 32\n        x = self.Conv2d_2a_3x3(x)\n        # 147 x 147 x 32\n        x = self.Conv2d_2b_3x3(x)\n        # 147 x 147 x 64\n        x = F.max_pool2d(x, kernel_size=3, stride=2)\n        # 73 x 73 x 64\n        x = self.Conv2d_3b_1x1(x)\n        # 73 x 73 x 80\n        x = self.Conv2d_4a_3x3(x)\n        # 71 x 71 x 192\n        x = F.max_pool2d(x, kernel_size=3, stride=2)\n        # 35 x 35 x 192\n        x = self.Mixed_5b(x)\n        # 35 x 35 x 256\n        x = self.Mixed_5c(x)\n        # 35 x 35 x 288\n        x = self.Mixed_5d(x)\n        # 35 x 35 x 288\n        outputs.append(x)\n        \n        x = self.Mixed_6a(x)\n        # 17 x 17 x 768\n        x = self.Mixed_6b(x)\n        # 17 x 17 x 768\n        x = self.Mixed_6c(x)\n        # 17 x 17 x 768\n        x = self.Mixed_6d(x)\n        # 17 x 17 x 768\n        x = self.Mixed_6e(x)\n        # 17 x 17 x 768\n        outputs.append(x)\n        \n        return outputs\n    \n\nclass MyVGG16(nn.Module):\n    def __init__(self,pretrained=False):\n        super(MyVGG16,self).__init__()\n        \n        vgg=models.vgg16(pretrained=pretrained)\n     \n        self.features=vgg.features\n        \n    def forward(self,x):\n        x=self.features(x)\n        return [x]\n    \n    \nclass MyVGG19(nn.Module):\n    def __init__(self,pretrained=False):\n        super(MyVGG19,self).__init__()\n        \n        vgg=models.vgg19(pretrained=pretrained)\n     \n        self.features=vgg.features\n        \n    def forward(self,x):\n        x=self.features(x)\n        return [x]"
  },
  {
    "path": "base_model.py",
    "content": "import torch \nimport torch.nn as nn\nimport torch.nn.functional as F \n\nimport numpy as np\n\nfrom backbone import *\nfrom utils import *\nfrom roi_align.roi_align import RoIAlign      # RoIAlign module\nfrom roi_align.roi_align import CropAndResize # crop_and_resize module\n\n\nclass Basenet_volleyball(nn.Module):\n    \"\"\"\n    main module of base model for the volleyball\n    \"\"\"\n    def __init__(self, cfg):\n        super(Basenet_volleyball, self).__init__()\n        self.cfg=cfg\n        \n        NFB=self.cfg.num_features_boxes\n        D=self.cfg.emb_features\n        K=self.cfg.crop_size[0]\n        \n\n        if cfg.backbone=='inv3':\n            self.backbone=MyInception_v3(transform_input=False,pretrained=True)\n        elif cfg.backbone=='vgg16':\n            self.backbone=MyVGG16(pretrained=True)\n        elif cfg.backbone=='vgg19':\n            self.backbone=MyVGG19(pretrained=True)\n        else:\n            assert False\n        \n        self.roi_align=RoIAlign(*self.cfg.crop_size)\n        \n        \n        self.fc_emb = nn.Linear(K*K*D,NFB)\n        self.dropout_emb = nn.Dropout(p=self.cfg.train_dropout_prob)\n        \n        self.fc_actions=nn.Linear(NFB,self.cfg.num_actions)\n        self.fc_activities=nn.Linear(NFB,self.cfg.num_activities)\n        \n        \n        for m in self.modules():\n            if isinstance(m,nn.Linear):\n                nn.init.kaiming_normal_(m.weight)\n                nn.init.zeros_(m.bias)\n\n\n    def savemodel(self,filepath):\n        state = {\n            'backbone_state_dict': self.backbone.state_dict(),\n            'fc_emb_state_dict':self.fc_emb.state_dict(),\n            'fc_actions_state_dict':self.fc_actions.state_dict(),\n            'fc_activities_state_dict':self.fc_activities.state_dict()\n        }\n        \n        torch.save(state, filepath)\n        print('model saved to:',filepath)\n\n    def loadmodel(self,filepath):\n        state = torch.load(filepath)\n        self.backbone.load_state_dict(state['backbone_state_dict'])\n        self.fc_emb.load_state_dict(state['fc_emb_state_dict'])\n        self.fc_actions.load_state_dict(state['fc_actions_state_dict'])\n        self.fc_activities.load_state_dict(state['fc_activities_state_dict'])\n        print('Load model states from: ',filepath)\n\n    def forward(self,batch_data):\n        images_in, boxes_in = batch_data\n        \n        # read config parameters\n        B=images_in.shape[0]\n        T=images_in.shape[1]\n        H, W=self.cfg.image_size\n        OH, OW=self.cfg.out_size\n        N=self.cfg.num_boxes\n        NFB=self.cfg.num_features_boxes\n        \n        # Reshape the input data\n        images_in_flat=torch.reshape(images_in,(B*T,3,H,W))  #B*T, 3, H, W\n        boxes_in_flat=torch.reshape(boxes_in,(B*T*N,4))  #B*T*N, 4\n\n        boxes_idx=[i * torch.ones(N, dtype=torch.int)   for i in range(B*T) ]\n        boxes_idx=torch.stack(boxes_idx).to(device=boxes_in.device)  # B*T, N\n        boxes_idx_flat=torch.reshape(boxes_idx,(B*T*N,))  #B*T*N,\n        \n        \n        # Use backbone to extract features of images_in\n        # Pre-precess first\n        images_in_flat=prep_images(images_in_flat)\n        \n        outputs=self.backbone(images_in_flat)\n        \n        \n        # Build multiscale features\n        features_multiscale=[]\n        for features in outputs:\n            if features.shape[2:4]!=torch.Size([OH,OW]):\n                features=F.interpolate(features,size=(OH,OW),mode='bilinear',align_corners=True)\n            features_multiscale.append(features)\n        \n        features_multiscale=torch.cat(features_multiscale,dim=1)  #B*T, D, OH, OW\n        \n        \n        \n        # ActNet\n        boxes_in_flat.requires_grad=False\n        boxes_idx_flat.requires_grad=False\n#         features_multiscale.requires_grad=False\n        \n    \n        # RoI Align\n        boxes_features=self.roi_align(features_multiscale,\n                                            boxes_in_flat,\n                                            boxes_idx_flat)  #B*T*N, D, K, K,\n        \n        \n        boxes_features=boxes_features.reshape(B*T*N,-1) # B*T*N, D*K*K\n        \n            \n        # Embedding to hidden state\n        boxes_features=self.fc_emb(boxes_features)  # B*T*N, NFB\n        boxes_features=F.relu(boxes_features)\n        boxes_features=self.dropout_emb(boxes_features)\n       \n    \n        boxes_states=boxes_features.reshape(B,T,N,NFB)\n        \n        # Predict actions\n        boxes_states_flat=boxes_states.reshape(-1,NFB)  #B*T*N, NFB\n\n        actions_scores=self.fc_actions(boxes_states_flat)  #B*T*N, actn_num\n        \n        \n        # Predict activities\n        boxes_states_pooled,_=torch.max(boxes_states,dim=2)  #B, T, NFB\n        boxes_states_pooled_flat=boxes_states_pooled.reshape(-1,NFB)  #B*T, NFB\n        \n        activities_scores=self.fc_activities(boxes_states_pooled_flat)  #B*T, acty_num\n        \n        if T!=1:\n            actions_scores=actions_scores.reshape(B,T,N,-1).mean(dim=1).reshape(B*N,-1)\n            activities_scores=activities_scores.reshape(B,T,-1).mean(dim=1)\n            \n        return actions_scores, activities_scores\n        \n        \nclass Basenet_collective(nn.Module):\n    \"\"\"\n    main module of base model for collective dataset\n    \"\"\"\n    def __init__(self, cfg):\n        super(Basenet_collective, self).__init__()\n        self.cfg=cfg\n        \n        D=self.cfg.emb_features\n        K=self.cfg.crop_size[0]\n        NFB=self.cfg.num_features_boxes\n        NFR, NFG=self.cfg.num_features_relation, self.cfg.num_features_gcn\n        \n        self.backbone=MyInception_v3(transform_input=False,pretrained=True)\n#         self.backbone=MyVGG16(pretrained=True)\n        \n        if not self.cfg.train_backbone:\n            for p in self.backbone.parameters():\n                p.requires_grad=False\n        \n        self.roi_align=RoIAlign(*self.cfg.crop_size)\n        \n        self.fc_emb_1=nn.Linear(K*K*D,NFB)\n        self.dropout_emb_1 = nn.Dropout(p=self.cfg.train_dropout_prob)\n#         self.nl_emb_1=nn.LayerNorm([NFB])\n        \n        \n        self.fc_actions=nn.Linear(NFB,self.cfg.num_actions)\n        self.fc_activities=nn.Linear(NFB,self.cfg.num_activities)\n        \n        for m in self.modules():\n            if isinstance(m,nn.Linear):\n                nn.init.kaiming_normal_(m.weight)\n\n    def savemodel(self,filepath):\n        state = {\n            'backbone_state_dict': self.backbone.state_dict(),\n            'fc_emb_state_dict':self.fc_emb_1.state_dict(),\n            'fc_actions_state_dict':self.fc_actions.state_dict(),\n            'fc_activities_state_dict':self.fc_activities.state_dict()\n        }\n        \n        torch.save(state, filepath)\n        print('model saved to:',filepath)\n        \n\n    def loadmodel(self,filepath):\n        state = torch.load(filepath)\n        self.backbone.load_state_dict(state['backbone_state_dict'])\n        self.fc_emb_1.load_state_dict(state['fc_emb_state_dict'])\n        print('Load model states from: ',filepath)\n        \n                \n    def forward(self,batch_data):\n        images_in, boxes_in, bboxes_num_in = batch_data\n    \n        # read config parameters\n        B=images_in.shape[0]\n        T=images_in.shape[1]\n        H, W=self.cfg.image_size\n        OH, OW=self.cfg.out_size\n        MAX_N=self.cfg.num_boxes\n        NFB=self.cfg.num_features_boxes\n        NFR, NFG=self.cfg.num_features_relation, self.cfg.num_features_gcn\n        EPS=1e-5\n        \n        D=self.cfg.emb_features\n        K=self.cfg.crop_size[0]\n        \n        # Reshape the input data\n        images_in_flat=torch.reshape(images_in,(B*T,3,H,W))  #B*T, 3, H, W\n        boxes_in=boxes_in.reshape(B*T,MAX_N,4)\n                \n        # Use backbone to extract features of images_in\n        # Pre-precess first\n        images_in_flat=prep_images(images_in_flat)\n        outputs=self.backbone(images_in_flat)\n            \n        \n        # Build multiscale features\n        features_multiscale=[]\n        for features in outputs:\n            if features.shape[2:4]!=torch.Size([OH,OW]):\n                features=F.interpolate(features,size=(OH,OW),mode='bilinear',align_corners=True)\n            features_multiscale.append(features)\n        \n        features_multiscale=torch.cat(features_multiscale,dim=1)  #B*T, D, OH, OW\n        \n\n        boxes_in_flat=torch.reshape(boxes_in,(B*T*MAX_N,4))  #B*T*MAX_N, 4\n            \n        boxes_idx=[i * torch.ones(MAX_N, dtype=torch.int)   for i in range(B*T) ]\n        boxes_idx=torch.stack(boxes_idx).to(device=boxes_in.device)  # B*T, MAX_N\n        boxes_idx_flat=torch.reshape(boxes_idx,(B*T*MAX_N,))  #B*T*MAX_N,\n\n        # RoI Align\n        boxes_in_flat.requires_grad=False\n        boxes_idx_flat.requires_grad=False\n        boxes_features_all=self.roi_align(features_multiscale,\n                                            boxes_in_flat,\n                                            boxes_idx_flat)  #B*T*MAX_N, D, K, K,\n        \n        boxes_features_all=boxes_features_all.reshape(B*T,MAX_N,-1)  #B*T,MAX_N, D*K*K\n        \n        # Embedding \n        boxes_features_all=self.fc_emb_1(boxes_features_all)  # B*T,MAX_N, NFB\n        boxes_features_all=F.relu(boxes_features_all)\n        boxes_features_all=self.dropout_emb_1(boxes_features_all)\n        \n    \n        actions_scores=[]\n        activities_scores=[]\n        bboxes_num_in=bboxes_num_in.reshape(B*T,)  #B*T,\n        for bt in range(B*T):\n        \n            N=bboxes_num_in[bt]\n            boxes_features=boxes_features_all[bt,:N,:].reshape(1,N,NFB)  #1,N,NFB\n    \n            boxes_states=boxes_features  \n\n            NFS=NFB\n\n            # Predict actions\n            boxes_states_flat=boxes_states.reshape(-1,NFS)  #1*N, NFS\n            actn_score=self.fc_actions(boxes_states_flat)  #1*N, actn_num\n            actions_scores.append(actn_score)\n\n            # Predict activities\n            boxes_states_pooled,_=torch.max(boxes_states,dim=1)  #1, NFS\n            boxes_states_pooled_flat=boxes_states_pooled.reshape(-1,NFS)  #1, NFS\n            acty_score=self.fc_activities(boxes_states_pooled_flat)  #1, acty_num\n            activities_scores.append(acty_score)\n\n        actions_scores=torch.cat(actions_scores,dim=0)  #ALL_N,actn_num\n        activities_scores=torch.cat(activities_scores,dim=0)   #B*T,acty_num\n        \n#         print(actions_scores.shape)\n#         print(activities_scores.shape)\n       \n        return actions_scores, activities_scores\n        "
  },
  {
    "path": "collective.py",
    "content": "import torch\nfrom torch.utils import data\nimport torchvision.models as models\nimport torchvision.transforms as transforms\n\nimport random\nfrom PIL import Image\nimport numpy as np\n\nfrom collections import Counter\n\n\nFRAMES_NUM={1: 302, 2: 347, 3: 194, 4: 257, 5: 536, 6: 401, 7: 968, 8: 221, 9: 356, 10: 302, \n            11: 1813, 12: 1084, 13: 851, 14: 723, 15: 464, 16: 1021, 17: 905, 18: 600, 19: 203, 20: 342, \n            21: 650, 22: 361, 23: 311, 24: 321, 25: 617, 26: 734, 27: 1804, 28: 470, 29: 635, 30: 356, \n            31: 690, 32: 194, 33: 193, 34: 395, 35: 707, 36: 914, 37: 1049, 38: 653, 39: 518, 40: 401, \n            41: 707, 42: 420, 43: 410, 44: 356}\n\n \nFRAMES_SIZE={1: (480, 720), 2: (480, 720), 3: (480, 720), 4: (480, 720), 5: (480, 720), 6: (480, 720), 7: (480, 720), 8: (480, 720), 9: (480, 720), 10: (480, 720), \n             11: (480, 720), 12: (480, 720), 13: (480, 720), 14: (480, 720), 15: (450, 800), 16: (480, 720), 17: (480, 720), 18: (480, 720), 19: (480, 720), 20: (450, 800), \n             21: (450, 800), 22: (450, 800), 23: (450, 800), 24: (450, 800), 25: (480, 720), 26: (480, 720), 27: (480, 720), 28: (480, 720), 29: (480, 720), 30: (480, 720), \n             31: (480, 720), 32: (480, 720), 33: (480, 720), 34: (480, 720), 35: (480, 720), 36: (480, 720), 37: (480, 720), 38: (480, 720), 39: (480, 720), 40: (480, 720), \n             41: (480, 720), 42: (480, 720), 43: (480, 720), 44: (480, 720)}\n\n\nACTIONS=['NA','Crossing','Waiting','Queueing','Walking','Talking']\nACTIVITIES=['Crossing','Waiting','Queueing','Walking','Talking']\n\n\nACTIONS_ID={a:i for i,a in enumerate(ACTIONS)}\nACTIVITIES_ID={a:i for i,a in enumerate(ACTIVITIES)}\n\n\ndef collective_read_annotations(path,sid):\n    annotations={}\n    path=path + '/seq%02d/annotations.txt' % sid\n    \n    with open(path,mode='r') as f:\n        frame_id=None\n        group_activity=None\n        actions=[]\n        bboxes=[]\n        for l in f.readlines():\n            values=l[:-1].split('\t')\n            \n            if int(values[0])!=frame_id:\n                if frame_id!=None and frame_id%10==1 and frame_id+9<=FRAMES_NUM[sid]:\n                    counter = Counter(actions).most_common(2)\n                    group_activity= counter[0][0]-1 if counter[0][0]!=0 else counter[1][0]-1\n                    annotations[frame_id]={\n                        'frame_id':frame_id,\n                        'group_activity':group_activity,\n                        'actions':actions,\n                        'bboxes':bboxes\n                    }\n                    \n                frame_id=int(values[0])\n                group_activity=None\n                actions=[]\n                bboxes=[]\n                \n            actions.append(int(values[5])-1)\n            x,y,w,h = (int(values[i])  for i  in range(1,5))\n            H,W=FRAMES_SIZE[sid]\n            \n            bboxes.append( (y/H,x/W,(y+h)/H,(x+w)/W) )\n        \n        if frame_id!=None and frame_id%10==1 and frame_id+9<=FRAMES_NUM[sid]:\n            counter = Counter(actions).most_common(2)\n            group_activity= counter[0][0]-1 if counter[0][0]!=0 else counter[1][0]-1\n            annotations[frame_id]={\n                'frame_id':frame_id,\n                'group_activity':group_activity,\n                'actions':actions,\n                'bboxes':bboxes\n            }\n\n    return annotations\n            \n        \n        \ndef collective_read_dataset(path,seqs):\n    data = {}\n    for sid in seqs:\n        data[sid] = collective_read_annotations(path,sid)\n    return data\n\ndef collective_all_frames(anns):\n    return [(s,f)  for s in anns for f in anns[s] ]\n\n\nclass CollectiveDataset(data.Dataset):\n    \"\"\"\n    Characterize collective dataset for pytorch\n    \"\"\"\n    def __init__(self,anns,frames,images_path,image_size,feature_size,num_boxes=13,num_frames=10,is_training=True,is_finetune=False):\n        self.anns=anns\n        self.frames=frames\n        self.images_path=images_path\n        self.image_size=image_size\n        self.feature_size=feature_size\n        \n        self.num_boxes=num_boxes\n        self.num_frames=num_frames\n        \n        self.is_training=is_training\n        self.is_finetune=is_finetune\n    \n    def __len__(self):\n        \"\"\"\n        Return the total number of samples\n        \"\"\"\n        return len(self.frames)\n    \n    def __getitem__(self,index):\n        \"\"\"\n        Generate one sample of the dataset\n        \"\"\"\n        \n        select_frames=self.get_frames(self.frames[index])\n        \n        sample=self.load_samples_sequence(select_frames)\n        \n        return sample\n    \n    def get_frames(self,frame):\n        \n        sid, src_fid = frame\n        \n        if self.is_finetune:\n            if self.is_training:\n                fid=random.randint(src_fid, src_fid+self.num_frames-1)\n                return [(sid, src_fid, fid)]\n        \n            else:\n                return [(sid, src_fid, fid) \n                        for fid in range(src_fid, src_fid+self.num_frames)]\n            \n        else:\n            if self.is_training:\n                sample_frames=random.sample(range(src_fid,src_fid+self.num_frames),3)\n                return [(sid, src_fid, fid) for fid in sample_frames]\n\n            else:\n                sample_frames=[ src_fid, src_fid+3, src_fid+6, src_fid+1, src_fid+4, src_fid+7, src_fid+2, src_fid+5, src_fid+8 ]\n                return [(sid, src_fid, fid) for fid in sample_frames]\n    \n    \n    def load_samples_sequence(self,select_frames):\n        \"\"\"\n        load samples sequence\n\n        Returns:\n            pytorch tensors\n        \"\"\"\n        OH, OW=self.feature_size\n        \n        images, bboxes = [], []\n        activities, actions = [], []\n        bboxes_num=[]\n    \n        \n        for i, (sid, src_fid, fid) in enumerate(select_frames):\n\n            img = Image.open(self.images_path + '/seq%02d/frame%04d.jpg'%(sid,fid))\n\n            img=transforms.functional.resize(img,self.image_size)\n            img=np.array(img)\n\n            # H,W,3 -> 3,H,W\n            img=img.transpose(2,0,1)\n            images.append(img)\n            \n            temp_boxes=[]\n            for box in self.anns[sid][src_fid]['bboxes']:\n                y1,x1,y2,x2=box\n                w1,h1,w2,h2 = x1*OW, y1*OH, x2*OW, y2*OH  \n                temp_boxes.append((w1,h1,w2,h2))\n                \n            temp_actions=self.anns[sid][src_fid]['actions'][:]\n            bboxes_num.append(len(temp_boxes))\n            \n            while len(temp_boxes)!=self.num_boxes:\n                temp_boxes.append((0,0,0,0))\n                temp_actions.append(-1)\n            \n            bboxes.append(temp_boxes)\n            actions.append(temp_actions)\n            \n            activities.append(self.anns[sid][src_fid]['group_activity'])\n        \n        \n        images = np.stack(images)\n        activities = np.array(activities, dtype=np.int32)\n        bboxes_num = np.array(bboxes_num, dtype=np.int32)\n        bboxes=np.array(bboxes,dtype=np.float).reshape(-1,self.num_boxes,4)\n        actions=np.array(actions,dtype=np.int32).reshape(-1,self.num_boxes)\n        \n        #convert to pytorch tensor\n        images=torch.from_numpy(images).float()\n        bboxes=torch.from_numpy(bboxes).float()\n        actions=torch.from_numpy(actions).long()\n        activities=torch.from_numpy(activities).long()\n        bboxes_num=torch.from_numpy(bboxes_num).int()\n        \n        return images, bboxes,  actions, activities, bboxes_num\n    \n    \n\n    \n"
  },
  {
    "path": "config.py",
    "content": "import time\nimport os\n\n\nclass Config(object):\n    \"\"\"\n    class to save config parameter\n    \"\"\"\n\n    def __init__(self, dataset_name):\n        # Global\n        self.image_size = 720, 1280  #input image size\n        self.batch_size =  32  #train batch size \n        self.test_batch_size = 8  #test batch size\n        self.num_boxes = 12  #max number of bounding boxes in each frame\n        \n        # Gpu\n        self.use_gpu=True\n        self.use_multi_gpu=True   \n        self.device_list=\"0,1,2,3\"  #id list of gpus used for training \n        \n        # Dataset\n        assert(dataset_name in ['volleyball', 'collective'])\n        self.dataset_name=dataset_name \n        \n        if dataset_name=='volleyball':\n            self.data_path='data/volleyball'  #data path for the volleyball dataset\n            self.train_seqs = [ 1,3,6,7,10,13,15,16,18,22,23,31,32,36,38,39,40,41,42,48,50,52,53,54,\n                                0,2,8,12,17,19,24,26,27,28,30,33,46,49,51]  #video id list of train set \n            self.test_seqs = [4,5,9,11,14,20,21,25,29,34,35,37,43,44,45,47]  #video id list of test set\n            \n        else:\n            self.data_path='data/collective'  #data path for the collective dataset\n            self.test_seqs=[5,6,7,8,9,10,11,15,16,25,28,29]\n            self.train_seqs=[s for s in range(1,45) if s not in self.test_seqs]\n        \n        # Backbone \n        self.backbone='inv3' \n        self.crop_size = 5, 5  #crop size of roi align\n        self.train_backbone = False  #if freeze the feature extraction part of network, True for stage 1, False for stage 2\n        self.out_size = 87, 157  #output feature map size of backbone \n        self.emb_features=1056   #output feature map channel of backbone\n\n        \n        # Activity Action\n        self.num_actions = 9  #number of action categories\n        self.num_activities = 8  #number of activity categories\n        self.actions_loss_weight = 1.0  #weight used to balance action loss and activity loss\n        self.actions_weights = None\n\n        # Sample\n        self.num_frames = 3 \n        self.num_before = 5\n        self.num_after = 4\n\n        # GCN\n        self.num_features_boxes = 1024\n        self.num_features_relation=256\n        self.num_graph=16  #number of graphs\n        self.num_features_gcn=self.num_features_boxes\n        self.gcn_layers=1  #number of GCN layers\n        self.tau_sqrt=False\n        self.pos_threshold=0.2  #distance mask threshold in position relation\n\n        # Training Parameters\n        self.train_random_seed = 0\n        self.train_learning_rate = 2e-4  #initial learning rate \n        self.lr_plan = {41:1e-4, 81:5e-5, 121:1e-5}  #change learning rate in these epochs \n        self.train_dropout_prob = 0.3  #dropout probability\n        self.weight_decay = 0  #l2 weight decay\n    \n        self.max_epoch=150  #max training epoch\n        self.test_interval_epoch=2\n        \n        # Exp\n        self.training_stage=1  #specify stage1 or stage2\n        self.stage1_model_path=''  #path of the base model, need to be set in stage2\n        self.test_before_train=False\n        self.exp_note='Group-Activity-Recognition'\n        self.exp_name=None\n        \n        \n    def init_config(self, need_new_folder=True):\n        if self.exp_name is None:\n            time_str=time.strftime(\"%Y-%m-%d_%H-%M-%S\", time.localtime())\n            self.exp_name='[%s_stage%d]<%s>'%(self.exp_note,self.training_stage,time_str)\n            \n        self.result_path='result/%s'%self.exp_name\n        self.log_path='result/%s/log.txt'%self.exp_name\n            \n        if need_new_folder:\n            os.mkdir(self.result_path)"
  },
  {
    "path": "data/collective/tracks/readTracks.m",
    "content": "function tracks = readTracks(filename)\nfp = fopen(filename, 'r');\n\ntline = fgetl(fp);\nnframe = sscanf(tline, 'Total frames %d');\n\ntline = fgetl(fp);\nntargets = sscanf(tline, 'Number of Targets %d');\n\n\nfor n = 1:ntargets\n    track = struct('id', n, 'ti', 0, 'te', 0, 'bbs', [], 'locs', []);\n    \n    tline = fgetl(fp);\n    temp = sscanf(tline, 'Target %d (frames from %d to %d)');\n    track.id = temp(1);     track.ti = temp(2);     track.te = temp(3);\n    \n    len = temp(3) - temp(2) + 1;\n    tline = fgetl(fp); % dummy line\n    for t = 1:len\n        tline = fgetl(fp);\n        temp = sscanf(tline, '%d\\t%d\\t%d\\t%d\\t%d');\n        track.bbs(:, t) = temp(2:5);\n    end\n    \n    tline = fgetl(fp); % dummy line\n    for t = 1:len\n        tline = fgetl(fp);\n        temp = sscanf(tline, '%d\\t%f\\t%f\\t%f\\t%f');\n        track.locs(:, t) = temp(2:5);\n    end\n    \n    tracks(n) = track;\nend\n\nfclose(fp);\n\nend"
  },
  {
    "path": "data/collective/tracks/showTracks.m",
    "content": "function showTracks(imdir, tracks)\n\nimfiles = dir([imdir '*.jpg']);\n\nfor i = 1:length(imfiles)\n    imshow([imdir imfiles(i).name]);\n    \n    drawTracks(tracks, i);\n    \n    drawnow;\nend\n\nend\n\n\n\nfunction drawTracks(tracks, frame)\n\ncmap = colormap;\n\nfor i = 1:length(tracks)\n    if ((tracks(i).ti <= frame) & ...\n        (tracks(i).te >= frame))\n        idx = frame - tracks(i).ti + 1;\n\n        col = cmap(mod(i*10, 64) + 1, :);\n        rectangle('Position', tracks(i).bbs(:, idx), 'EdgeColor', col, 'LineWidth', 3);\n    end\nend\n\nend"
  },
  {
    "path": "dataset.py",
    "content": "from volleyball import *\nfrom collective import *\n\nimport pickle\n\n\ndef return_dataset(cfg):\n    if cfg.dataset_name=='volleyball':\n        train_anns = volley_read_dataset(cfg.data_path, cfg.train_seqs)\n        train_frames = volley_all_frames(train_anns)\n\n        test_anns = volley_read_dataset(cfg.data_path, cfg.test_seqs)\n        test_frames = volley_all_frames(test_anns)\n\n        all_anns = {**train_anns, **test_anns}\n        all_tracks = pickle.load(open(cfg.data_path + '/tracks_normalized.pkl', 'rb'))\n\n\n        training_set=VolleyballDataset(all_anns,all_tracks,train_frames,\n                                      cfg.data_path,cfg.image_size,cfg.out_size,num_before=cfg.num_before,\n                                       num_after=cfg.num_after,is_training=True,is_finetune=(cfg.training_stage==1))\n\n        validation_set=VolleyballDataset(all_anns,all_tracks,test_frames,\n                                      cfg.data_path,cfg.image_size,cfg.out_size,num_before=cfg.num_before,\n                                         num_after=cfg.num_after,is_training=False,is_finetune=(cfg.training_stage==1))\n    \n    elif cfg.dataset_name=='collective':\n        train_anns=collective_read_dataset(cfg.data_path, cfg.train_seqs)\n        train_frames=collective_all_frames(train_anns)\n\n        test_anns=collective_read_dataset(cfg.data_path, cfg.test_seqs)\n        test_frames=collective_all_frames(test_anns)\n\n        training_set=CollectiveDataset(train_anns,train_frames,\n                                      cfg.data_path,cfg.image_size,cfg.out_size,\n                                       num_frames=cfg.num_frames,is_training=True,is_finetune=(cfg.training_stage==1))\n\n        validation_set=CollectiveDataset(test_anns,test_frames,\n                                      cfg.data_path,cfg.image_size,cfg.out_size,\n                                         num_frames=cfg.num_frames,is_training=False,is_finetune=(cfg.training_stage==1))\n                              \n    else:\n        assert False\n                                         \n    \n    print('Reading dataset finished...')\n    print('%d train samples'%len(train_frames))\n    print('%d test samples'%len(test_frames))\n    \n    return training_set, validation_set\n    "
  },
  {
    "path": "gcn_model.py",
    "content": "import torch \nimport torch.nn as nn\nimport torch.nn.functional as F \n\nimport numpy as np\n\nfrom backbone import *\nfrom utils import *\nfrom roi_align.roi_align import RoIAlign      # RoIAlign module\nfrom roi_align.roi_align import CropAndResize # crop_and_resize module\n\n\nclass GCN_Module(nn.Module):\n    def __init__(self, cfg):\n        super(GCN_Module, self).__init__()\n        \n        self.cfg=cfg\n        \n        NFR =cfg.num_features_relation\n        \n        NG=cfg.num_graph\n        N=cfg.num_boxes\n        T=cfg.num_frames\n        \n        NFG=cfg.num_features_gcn\n        NFG_ONE=NFG\n        \n        self.fc_rn_theta_list=torch.nn.ModuleList([ nn.Linear(NFG,NFR) for i in range(NG) ])\n        self.fc_rn_phi_list=torch.nn.ModuleList([ nn.Linear(NFG,NFR) for i in range(NG) ])\n        \n        \n        self.fc_gcn_list=torch.nn.ModuleList([ nn.Linear(NFG,NFG_ONE,bias=False) for i in range(NG) ])\n        \n        if cfg.dataset_name=='volleyball':\n            self.nl_gcn_list=torch.nn.ModuleList([ nn.LayerNorm([T*N,NFG_ONE]) for i in range(NG) ])\n        else:\n            self.nl_gcn_list=torch.nn.ModuleList([ nn.LayerNorm([NFG_ONE]) for i in range(NG) ])\n        \n            \n\n        \n    def forward(self,graph_boxes_features,boxes_in_flat):\n        \"\"\"\n        graph_boxes_features  [B*T,N,NFG]\n        \"\"\"\n        \n        # GCN graph modeling\n        # Prepare boxes similarity relation\n        B,N,NFG=graph_boxes_features.shape\n        NFR=self.cfg.num_features_relation\n        NG=self.cfg.num_graph\n        NFG_ONE=NFG\n        \n        OH, OW=self.cfg.out_size\n        pos_threshold=self.cfg.pos_threshold\n        \n        # Prepare position mask\n        graph_boxes_positions=boxes_in_flat  #B*T*N, 4\n        graph_boxes_positions[:,0]=(graph_boxes_positions[:,0] + graph_boxes_positions[:,2]) / 2 \n        graph_boxes_positions[:,1]=(graph_boxes_positions[:,1] + graph_boxes_positions[:,3]) / 2 \n        graph_boxes_positions=graph_boxes_positions[:,:2].reshape(B,N,2)  #B*T, N, 2\n        \n        graph_boxes_distances=calc_pairwise_distance_3d(graph_boxes_positions,graph_boxes_positions)  #B, N, N\n        \n        position_mask=( graph_boxes_distances > (pos_threshold*OW) )\n        \n        \n        relation_graph=None\n        graph_boxes_features_list=[]\n        for i in range(NG):\n            graph_boxes_features_theta=self.fc_rn_theta_list[i](graph_boxes_features)  #B,N,NFR\n            graph_boxes_features_phi=self.fc_rn_phi_list[i](graph_boxes_features)  #B,N,NFR\n\n#             graph_boxes_features_theta=self.nl_rn_theta_list[i](graph_boxes_features_theta)\n#             graph_boxes_features_phi=self.nl_rn_phi_list[i](graph_boxes_features_phi)\n\n            similarity_relation_graph=torch.matmul(graph_boxes_features_theta,graph_boxes_features_phi.transpose(1,2))  #B,N,N\n\n            similarity_relation_graph=similarity_relation_graph/np.sqrt(NFR)\n\n            similarity_relation_graph=similarity_relation_graph.reshape(-1,1)  #B*N*N, 1\n            \n        \n        \n            # Build relation graph\n            relation_graph=similarity_relation_graph\n\n            relation_graph = relation_graph.reshape(B,N,N)\n\n            relation_graph[position_mask]=-float('inf')\n\n            relation_graph = torch.softmax(relation_graph,dim=2)       \n        \n            # Graph convolution\n            one_graph_boxes_features=self.fc_gcn_list[i]( torch.matmul(relation_graph,graph_boxes_features) )  #B, N, NFG_ONE\n            one_graph_boxes_features=self.nl_gcn_list[i](one_graph_boxes_features)\n            one_graph_boxes_features=F.relu(one_graph_boxes_features)\n            \n            graph_boxes_features_list.append(one_graph_boxes_features)\n        \n        graph_boxes_features=torch.sum(torch.stack(graph_boxes_features_list),dim=0) #B, N, NFG\n        \n        return graph_boxes_features,relation_graph\n\nclass GCNnet_volleyball(nn.Module):\n    \"\"\"\n    main module of GCN for the volleyball dataset\n    \"\"\"\n    def __init__(self, cfg):\n        super(GCNnet_volleyball, self).__init__()\n        self.cfg=cfg\n        \n        T, N=self.cfg.num_frames, self.cfg.num_boxes\n        D=self.cfg.emb_features\n        K=self.cfg.crop_size[0]\n        NFB=self.cfg.num_features_boxes\n        NFR, NFG=self.cfg.num_features_relation, self.cfg.num_features_gcn\n        NG=self.cfg.num_graph\n        \n        \n        if cfg.backbone=='inv3':\n            self.backbone=MyInception_v3(transform_input=False,pretrained=True)\n        elif cfg.backbone=='vgg16':\n            self.backbone=MyVGG16(pretrained=True)\n        elif cfg.backbone=='vgg19':\n            self.backbone=MyVGG19(pretrained=False)\n        else:\n            assert False\n        \n        if not cfg.train_backbone:\n            for p in self.backbone.parameters():\n                p.requires_grad=False\n        \n        self.roi_align=RoIAlign(*self.cfg.crop_size)\n        \n        self.fc_emb_1=nn.Linear(K*K*D,NFB)\n        self.nl_emb_1=nn.LayerNorm([NFB])\n        \n        \n        self.gcn_list = torch.nn.ModuleList([ GCN_Module(self.cfg)  for i in range(self.cfg.gcn_layers) ])    \n        \n        \n        self.dropout_global=nn.Dropout(p=self.cfg.train_dropout_prob)\n    \n        self.fc_actions=nn.Linear(NFG,self.cfg.num_actions)\n        self.fc_activities=nn.Linear(NFG,self.cfg.num_activities)\n        \n        for m in self.modules():\n            if isinstance(m,nn.Linear):\n                nn.init.kaiming_normal_(m.weight)\n                if m.bias is not None:\n                    nn.init.zeros_(m.bias)\n                    \n                    \n    def loadmodel(self,filepath):\n        state = torch.load(filepath)\n        self.backbone.load_state_dict(state['backbone_state_dict'])\n        self.fc_emb_1.load_state_dict(state['fc_emb_state_dict'])\n        print('Load model states from: ',filepath)\n        \n                \n    def forward(self,batch_data):\n        images_in, boxes_in = batch_data\n        \n        # read config parameters\n        B=images_in.shape[0]\n        T=images_in.shape[1]\n        H, W=self.cfg.image_size\n        OH, OW=self.cfg.out_size\n        N=self.cfg.num_boxes\n        NFB=self.cfg.num_features_boxes\n        NFR, NFG=self.cfg.num_features_relation, self.cfg.num_features_gcn\n        NG=self.cfg.num_graph\n        \n        D=self.cfg.emb_features\n        K=self.cfg.crop_size[0]\n        \n        \n        if not self.training:\n            B=B*3\n            T=T//3\n            images_in.reshape( (B,T)+images_in.shape[2:] )\n            boxes_in.reshape(  (B,T)+boxes_in.shape[2:] )\n        \n        \n        # Reshape the input data\n        images_in_flat=torch.reshape(images_in,(B*T,3,H,W))  #B*T, 3, H, W\n        boxes_in_flat=torch.reshape(boxes_in,(B*T*N,4))  #B*T*N, 4\n\n        boxes_idx=[i * torch.ones(N, dtype=torch.int)   for i in range(B*T) ]\n        boxes_idx=torch.stack(boxes_idx).to(device=boxes_in.device)  # B*T, N\n        boxes_idx_flat=torch.reshape(boxes_idx,(B*T*N,))  #B*T*N,\n        \n        # Use backbone to extract features of images_in\n        # Pre-precess first\n        images_in_flat=prep_images(images_in_flat)\n        outputs=self.backbone(images_in_flat)\n            \n        \n        # Build  features\n        assert outputs[0].shape[2:4]==torch.Size([OH,OW])\n        features_multiscale=[]\n        for features in outputs:\n            if features.shape[2:4]!=torch.Size([OH,OW]):\n                features=F.interpolate(features,size=(OH,OW),mode='bilinear',align_corners=True)\n            features_multiscale.append(features)\n        \n        features_multiscale=torch.cat(features_multiscale,dim=1)  #B*T, D, OH, OW\n        \n        \n        # RoI Align\n        boxes_in_flat.requires_grad=False\n        boxes_idx_flat.requires_grad=False\n        boxes_features=self.roi_align(features_multiscale,\n                                            boxes_in_flat,\n                                            boxes_idx_flat)  #B*T*N, D, K, K,\n        \n        boxes_features=boxes_features.reshape(B,T,N,-1)  #B,T,N, D*K*K\n        \n        \n        # Embedding \n        boxes_features=self.fc_emb_1(boxes_features)  # B,T,N, NFB\n        boxes_features=self.nl_emb_1(boxes_features)\n        boxes_features=F.relu(boxes_features)\n        \n        \n        \n        # GCN       \n        graph_boxes_features=boxes_features.reshape(B,T*N,NFG)\n        \n#         visual_info=[]\n        for i in range(len(self.gcn_list)):\n            graph_boxes_features,relation_graph=self.gcn_list[i](graph_boxes_features,boxes_in_flat)\n#             visual_info.append(relation_graph.reshape(B,T,N,N))\n        \n        \n       \n        # fuse graph_boxes_features with boxes_features\n        graph_boxes_features=graph_boxes_features.reshape(B,T,N,NFG)  \n        boxes_features=boxes_features.reshape(B,T,N,NFB)\n        \n#         boxes_states= torch.cat( [graph_boxes_features,boxes_features],dim=3)  #B, T, N, NFG+NFB\n        boxes_states=graph_boxes_features+boxes_features\n    \n        boxes_states=self.dropout_global(boxes_states)\n\n        NFS=NFG\n        \n        # Predict actions\n        boxes_states_flat=boxes_states.reshape(-1,NFS)  #B*T*N, NFS\n        actions_scores=self.fc_actions(boxes_states_flat)  #B*T*N, actn_num\n        \n        # Predict activities\n        boxes_states_pooled,_=torch.max(boxes_states,dim=2)  \n        boxes_states_pooled_flat=boxes_states_pooled.reshape(-1,NFS)  \n        \n        activities_scores=self.fc_activities(boxes_states_pooled_flat)  #B*T, acty_num\n        \n        # Temporal fusion\n        actions_scores=actions_scores.reshape(B,T,N,-1)\n        actions_scores=torch.mean(actions_scores,dim=1).reshape(B*N,-1)\n        activities_scores=activities_scores.reshape(B,T,-1)\n        activities_scores=torch.mean(activities_scores,dim=1).reshape(B,-1)\n        \n        if not self.training:\n            B=B//3\n            actions_scores=torch.mean(actions_scores.reshape(B,3,N,-1),dim=1).reshape(B*N,-1)\n            activities_scores=torch.mean(activities_scores.reshape(B,3,-1),dim=1).reshape(B,-1)\n       \n       \n        return actions_scores, activities_scores\n       \n        \n\n        \nclass GCNnet_collective(nn.Module):\n    \"\"\"\n    main module of GCN for the collective dataset\n    \"\"\"\n    def __init__(self, cfg):\n        super(GCNnet_collective, self).__init__()\n        self.cfg=cfg\n        \n        D=self.cfg.emb_features\n        K=self.cfg.crop_size[0]\n        NFB=self.cfg.num_features_boxes\n        NFR, NFG=self.cfg.num_features_relation, self.cfg.num_features_gcn\n        \n        self.backbone=MyInception_v3(transform_input=False,pretrained=True)\n        \n        if not self.cfg.train_backbone:\n            for p in self.backbone.parameters():\n                p.requires_grad=False\n        \n        self.roi_align=RoIAlign(*self.cfg.crop_size)\n        \n        self.fc_emb_1=nn.Linear(K*K*D,NFB)\n        self.nl_emb_1=nn.LayerNorm([NFB])\n        \n        \n        self.gcn_list = torch.nn.ModuleList([ GCN_Module(self.cfg)  for i in range(self.cfg.gcn_layers) ])    \n        \n        \n        self.dropout_global=nn.Dropout(p=self.cfg.train_dropout_prob)\n    \n        self.fc_actions=nn.Linear(NFG,self.cfg.num_actions)\n        self.fc_activities=nn.Linear(NFG,self.cfg.num_activities)\n        \n        for m in self.modules():\n            if isinstance(m,nn.Linear):\n                nn.init.kaiming_normal_(m.weight)\n                if m.bias is not None:\n                    nn.init.zeros_(m.bias)\n\n#         nn.init.zeros_(self.fc_gcn_3.weight)\n        \n\n    def loadmodel(self,filepath):\n        state = torch.load(filepath)\n        self.backbone.load_state_dict(state['backbone_state_dict'])\n        self.fc_emb_1.load_state_dict(state['fc_emb_state_dict'])\n        print('Load model states from: ',filepath)\n        \n                \n    def forward(self,batch_data):\n        images_in, boxes_in, bboxes_num_in = batch_data\n        \n        # read config parameters\n        B=images_in.shape[0]\n        T=images_in.shape[1]\n        H, W=self.cfg.image_size\n        OH, OW=self.cfg.out_size\n        MAX_N=self.cfg.num_boxes\n        NFB=self.cfg.num_features_boxes\n        NFR, NFG=self.cfg.num_features_relation, self.cfg.num_features_gcn\n        \n        D=self.cfg.emb_features\n        K=self.cfg.crop_size[0]\n        \n        if not self.training:\n            B=B*3\n            T=T//3\n            images_in.reshape( (B,T)+images_in.shape[2:] )\n            boxes_in.reshape(  (B,T)+boxes_in.shape[2:] )\n            bboxes_num_in.reshape((B,T))\n        \n        # Reshape the input data\n        images_in_flat=torch.reshape(images_in,(B*T,3,H,W))  #B*T, 3, H, W\n        boxes_in=boxes_in.reshape(B*T,MAX_N,4)\n                \n        # Use backbone to extract features of images_in\n        # Pre-precess first\n        images_in_flat=prep_images(images_in_flat)\n        outputs=self.backbone(images_in_flat)\n            \n        \n        # Build multiscale features\n        features_multiscale=[]\n        for features in outputs:\n            if features.shape[2:4]!=torch.Size([OH,OW]):\n                features=F.interpolate(features,size=(OH,OW),mode='bilinear',align_corners=True)\n            features_multiscale.append(features)\n        \n        features_multiscale=torch.cat(features_multiscale,dim=1)  #B*T, D, OH, OW\n        \n\n        boxes_in_flat=torch.reshape(boxes_in,(B*T*MAX_N,4))  #B*T*MAX_N, 4\n            \n        boxes_idx=[i * torch.ones(MAX_N, dtype=torch.int)   for i in range(B*T) ]\n        boxes_idx=torch.stack(boxes_idx).to(device=boxes_in.device)  # B*T, MAX_N\n        boxes_idx_flat=torch.reshape(boxes_idx,(B*T*MAX_N,))  #B*T*MAX_N,\n\n        # RoI Align\n        boxes_in_flat.requires_grad=False\n        boxes_idx_flat.requires_grad=False\n        boxes_features_all=self.roi_align(features_multiscale,\n                                            boxes_in_flat,\n                                            boxes_idx_flat)  #B*T*MAX_N, D, K, K,\n        \n        boxes_features_all=boxes_features_all.reshape(B*T,MAX_N,-1)  #B*T,MAX_N, D*K*K\n        \n        # Embedding \n        boxes_features_all=self.fc_emb_1(boxes_features_all)  # B*T,MAX_N, NFB\n        boxes_features_all=self.nl_emb_1(boxes_features_all)\n        boxes_features_all=F.relu(boxes_features_all)\n        \n        \n        boxes_features_all=boxes_features_all.reshape(B,T,MAX_N,NFB)\n        boxes_in=boxes_in.reshape(B,T,MAX_N,4)\n        \n    \n        actions_scores=[]\n        activities_scores=[]\n        bboxes_num_in=bboxes_num_in.reshape(B,T)  #B,T,\n        \n        for b in range(B):\n            \n            N=bboxes_num_in[b][0]\n            \n            boxes_features=boxes_features_all[b,:,:N,:].reshape(1,T*N,NFB)  #1,T,N,NFB\n        \n            boxes_positions=boxes_in[b,:,:N,:].reshape(T*N,4)  #T*N, 4\n        \n            # GCN graph modeling\n            for i in range(len(self.gcn_list)):\n                graph_boxes_features,relation_graph=self.gcn_list[i](boxes_features,boxes_positions)\n        \n        \n            # cat graph_boxes_features with boxes_features\n            boxes_features=boxes_features.reshape(1,T*N,NFB)\n            boxes_states=graph_boxes_features+boxes_features  #1, T*N, NFG\n            boxes_states=self.dropout_global(boxes_states)\n            \n\n            NFS=NFG\n        \n            boxes_states=boxes_states.reshape(T,N,NFS)\n        \n            # Predict actions\n            actn_score=self.fc_actions(boxes_states)  #T,N, actn_num\n            \n\n            # Predict activities\n            boxes_states_pooled,_=torch.max(boxes_states,dim=1)  #T, NFS\n            acty_score=self.fc_activities(boxes_states_pooled)  #T, acty_num\n            \n            \n            # GSN fusion\n            actn_score=torch.mean(actn_score,dim=0).reshape(N,-1)  #N, actn_num\n            acty_score=torch.mean(acty_score,dim=0).reshape(1,-1)  #1, acty_num\n            \n            \n            actions_scores.append(actn_score)  \n            activities_scores.append(acty_score)\n            \n            \n\n        actions_scores=torch.cat(actions_scores,dim=0)  #ALL_N,actn_num\n        activities_scores=torch.cat(activities_scores,dim=0)   #B,acty_num\n        \n        \n        if not self.training:\n            B=B//3\n            actions_scores=torch.mean(actions_scores.reshape(-1,3,actions_scores.shape[1]),dim=1)\n            activities_scores=torch.mean(activities_scores.reshape(B,3,-1),dim=1).reshape(B,-1)\n       \n        \n#         print(actions_scores.shape)\n#         print(activities_scores.shape)\n       \n        return actions_scores, activities_scores\n        "
  },
  {
    "path": "result/.gitkeep",
    "content": ""
  },
  {
    "path": "scripts/train_collective_stage1.py",
    "content": "import sys\nsys.path.append(\".\")\nfrom train_net import *\n\ncfg=Config('collective')\n\ncfg.device_list=\"0,1\"\ncfg.training_stage=1\ncfg.train_backbone=True\n\ncfg.image_size=480, 720\ncfg.out_size=57,87\ncfg.num_boxes=13\ncfg.num_actions=6\ncfg.num_activities=5\ncfg.num_frames=10\n\ncfg.batch_size=16\ncfg.test_batch_size=8 \ncfg.train_learning_rate=1e-5\ncfg.train_dropout_prob=0.5\ncfg.weight_decay=1e-2\ncfg.lr_plan={}\ncfg.max_epoch=100\n\ncfg.exp_note='Collective_stage1'\ntrain_net(cfg)"
  },
  {
    "path": "scripts/train_collective_stage2.py",
    "content": "import sys\nsys.path.append(\".\")\nfrom train_net import *\n\ncfg=Config('collective')\n\ncfg.device_list=\"0,1\"\ncfg.training_stage=2\ncfg.stage1_model_path='result/STAGE1_MODEL.pth'  #PATH OF THE BASE MODEL\ncfg.train_backbone=False\n\ncfg.image_size=480, 720\ncfg.out_size=57,87\ncfg.num_boxes=13\ncfg.num_actions=6\ncfg.num_activities=5\ncfg.num_frames=10\ncfg.num_graph=4\ncfg.tau_sqrt=True\n\ncfg.batch_size=16\ncfg.test_batch_size=8 \ncfg.train_learning_rate=1e-4\ncfg.train_dropout_prob=0.2\ncfg.weight_decay=1e-2\ncfg.lr_plan={}\ncfg.max_epoch=50\n\ncfg.exp_note='Collective_stage2'\ntrain_net(cfg)"
  },
  {
    "path": "scripts/train_volleyball_stage1.py",
    "content": "import sys\nsys.path.append(\".\")\nfrom train_net import *\n\ncfg=Config('volleyball')\n\ncfg.device_list=\"0,1,2,3\"\ncfg.training_stage=1\ncfg.stage1_model_path=''\ncfg.train_backbone=True\n\ncfg.batch_size=8\ncfg.test_batch_size=4\ncfg.num_frames=1\ncfg.train_learning_rate=1e-5\ncfg.lr_plan={}\ncfg.max_epoch=200\ncfg.actions_weights=[[1., 1., 2., 3., 1., 2., 2., 0.2, 1.]]  \n\ncfg.exp_note='Volleyball_stage1'\ntrain_net(cfg)"
  },
  {
    "path": "scripts/train_volleyball_stage2.py",
    "content": "import sys\nsys.path.append(\".\")\nfrom train_net import *\n\ncfg=Config('volleyball')\n\ncfg.device_list=\"0,1,2,3\"\ncfg.training_stage=2\ncfg.stage1_model_path='result/STAGE1_MODEL.pth'  #PATH OF THE BASE MODEL\ncfg.train_backbone=False\n\ncfg.batch_size=32 #32\ncfg.test_batch_size=8 \ncfg.num_frames=3\ncfg.train_learning_rate=2e-4 \ncfg.lr_plan={41:1e-4, 81:5e-5, 121:1e-5}\ncfg.max_epoch=150\ncfg.actions_weights=[[1., 1., 2., 3., 1., 2., 2., 0.2, 1.]]  \n\ncfg.exp_note='Volleyball_stage2'\ntrain_net(cfg)"
  },
  {
    "path": "train_net.py",
    "content": "import torch\nimport torch.optim as optim\n\nimport time\nimport random\nimport os\nimport sys\n\nfrom config import *\nfrom volleyball import *\nfrom collective import *\nfrom dataset import *\nfrom gcn_model import *\nfrom base_model import *\nfrom utils import *\n\n\ndef set_bn_eval(m):\n    classname = m.__class__.__name__\n    if classname.find('BatchNorm') != -1:\n        m.eval()\n            \ndef adjust_lr(optimizer, new_lr):\n    print('change learning rate:',new_lr)\n    for param_group in optimizer.param_groups:\n        param_group['lr'] = new_lr\n\ndef train_net(cfg):\n    \"\"\"\n    training gcn net\n    \"\"\"\n    os.environ['CUDA_VISIBLE_DEVICES']=cfg.device_list\n    \n    # Show config parameters\n    cfg.init_config()\n    show_config(cfg)\n    \n    # Reading dataset\n    training_set,validation_set=return_dataset(cfg)\n    \n    params = {\n        'batch_size': cfg.batch_size,\n        'shuffle': True,\n        'num_workers': 4\n    }\n    training_loader=data.DataLoader(training_set,**params)\n    \n    params['batch_size']=cfg.test_batch_size\n    validation_loader=data.DataLoader(validation_set,**params)\n    \n    # Set random seed\n    np.random.seed(cfg.train_random_seed)\n    torch.manual_seed(cfg.train_random_seed)\n    random.seed(cfg.train_random_seed)\n\n    # Set data position\n    if cfg.use_gpu and torch.cuda.is_available():\n        device = torch.device('cuda')\n    else:\n        device = torch.device('cpu')\n    \n    # Build model and optimizer\n    basenet_list={'volleyball':Basenet_volleyball, 'collective':Basenet_collective}\n    gcnnet_list={'volleyball':GCNnet_volleyball, 'collective':GCNnet_collective}\n    \n    if cfg.training_stage==1:\n        Basenet=basenet_list[cfg.dataset_name]\n        model=Basenet(cfg)\n    elif cfg.training_stage==2:\n        GCNnet=gcnnet_list[cfg.dataset_name]\n        model=GCNnet(cfg)\n        # Load backbone\n        model.loadmodel(cfg.stage1_model_path)\n    else:\n        assert(False)\n    \n    if cfg.use_multi_gpu:\n        model=nn.DataParallel(model)\n\n    model=model.to(device=device)\n    \n    model.train()\n    model.apply(set_bn_eval)\n    \n    optimizer=optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),lr=cfg.train_learning_rate,weight_decay=cfg.weight_decay)\n\n    train_list={'volleyball':train_volleyball, 'collective':train_collective}\n    test_list={'volleyball':test_volleyball, 'collective':test_collective}\n    train=train_list[cfg.dataset_name]\n    test=test_list[cfg.dataset_name]\n    \n    if cfg.test_before_train:\n        test_info=test(validation_loader, model, device, 0, cfg)\n        print(test_info)\n\n    # Training iteration\n    best_result={'epoch':0, 'activities_acc':0}\n    start_epoch=1\n    for epoch in range(start_epoch, start_epoch+cfg.max_epoch):\n        \n        if epoch in cfg.lr_plan:\n            adjust_lr(optimizer, cfg.lr_plan[epoch])\n            \n        # One epoch of forward and backward\n        train_info=train(training_loader, model, device, optimizer, epoch, cfg)\n        show_epoch_info('Train', cfg.log_path, train_info)\n\n        # Test\n        if epoch % cfg.test_interval_epoch == 0:\n            test_info=test(validation_loader, model, device, epoch, cfg)\n            show_epoch_info('Test', cfg.log_path, test_info)\n            \n            if test_info['activities_acc']>best_result['activities_acc']:\n                best_result=test_info\n            print_log(cfg.log_path, \n                      'Best group activity accuracy: %.2f%% at epoch #%d.'%(best_result['activities_acc'], best_result['epoch']))\n            \n            # Save model\n            if cfg.training_stage==2:\n                state = {\n                    'epoch': epoch,\n                    'state_dict': model.state_dict(),\n                    'optimizer': optimizer.state_dict(),\n                }\n                filepath=cfg.result_path+'/stage%d_epoch%d_%.2f%%.pth'%(cfg.training_stage,epoch,test_info['activities_acc'])\n                torch.save(state, filepath)\n                print('model saved to:',filepath)   \n            elif cfg.training_stage==1:\n                for m in model.modules():\n                    if isinstance(m, Basenet):\n                        filepath=cfg.result_path+'/stage%d_epoch%d_%.2f%%.pth'%(cfg.training_stage,epoch,test_info['activities_acc'])\n                        m.savemodel(filepath)\n#                         print('model saved to:',filepath)\n            else:\n                assert False\n    \n   \ndef train_volleyball(data_loader, model, device, optimizer, epoch, cfg):\n    \n    actions_meter=AverageMeter()\n    activities_meter=AverageMeter()\n    loss_meter=AverageMeter()\n    epoch_timer=Timer()\n    for batch_data in data_loader:\n        model.train()\n        model.apply(set_bn_eval)\n    \n        # prepare batch data\n        batch_data=[b.to(device=device) for b in batch_data]\n        batch_size=batch_data[0].shape[0]\n        num_frames=batch_data[0].shape[1]\n\n        actions_in=batch_data[2].reshape((batch_size,num_frames,cfg.num_boxes))\n        activities_in=batch_data[3].reshape((batch_size,num_frames))\n\n        actions_in=actions_in[:,0,:].reshape((batch_size*cfg.num_boxes,))\n        activities_in=activities_in[:,0].reshape((batch_size,))\n\n        # forward\n        actions_scores,activities_scores=model((batch_data[0],batch_data[1]))\n\n        # Predict actions\n        actions_weights=torch.tensor(cfg.actions_weights).to(device=device)\n        actions_loss=F.cross_entropy(actions_scores,actions_in,weight=actions_weights)  \n        actions_labels=torch.argmax(actions_scores,dim=1)  \n        actions_correct=torch.sum(torch.eq(actions_labels.int(),actions_in.int()).float())\n\n        # Predict activities\n        activities_loss=F.cross_entropy(activities_scores,activities_in)\n        activities_labels=torch.argmax(activities_scores,dim=1)  \n        activities_correct=torch.sum(torch.eq(activities_labels.int(),activities_in.int()).float())\n\n        # Get accuracy\n        actions_accuracy=actions_correct.item()/actions_scores.shape[0]\n        activities_accuracy=activities_correct.item()/activities_scores.shape[0]\n        \n        actions_meter.update(actions_accuracy, actions_scores.shape[0])\n        activities_meter.update(activities_accuracy, activities_scores.shape[0])\n\n        # Total loss\n        total_loss=activities_loss+cfg.actions_loss_weight*actions_loss\n        loss_meter.update(total_loss.item(), batch_size)\n\n        # Optim\n        optimizer.zero_grad()\n        total_loss.backward()\n        optimizer.step()\n    \n    train_info={\n        'time':epoch_timer.timeit(),\n        'epoch':epoch,\n        'loss':loss_meter.avg,\n        'activities_acc':activities_meter.avg*100,\n        'actions_acc':actions_meter.avg*100\n    }\n    \n    return train_info\n        \n    \ndef test_volleyball(data_loader, model, device, epoch, cfg):\n    model.eval()\n    \n    actions_meter=AverageMeter()\n    activities_meter=AverageMeter()\n    loss_meter=AverageMeter()\n    \n    epoch_timer=Timer()\n    with torch.no_grad():\n        for batch_data_test in data_loader:\n            # prepare batch data\n            batch_data_test=[b.to(device=device) for b in batch_data_test]\n            batch_size=batch_data_test[0].shape[0]\n            num_frames=batch_data_test[0].shape[1]\n\n            actions_in=batch_data_test[2].reshape((batch_size,num_frames,cfg.num_boxes))\n            activities_in=batch_data_test[3].reshape((batch_size,num_frames))\n            \n            # forward\n            actions_scores,activities_scores=model((batch_data_test[0],batch_data_test[1]))\n            \n            # Predict actions\n            actions_in=actions_in[:,0,:].reshape((batch_size*cfg.num_boxes,))\n            activities_in=activities_in[:,0].reshape((batch_size,))\n            \n            actions_weights=torch.tensor(cfg.actions_weights).to(device=device)\n            actions_loss=F.cross_entropy(actions_scores,actions_in,weight=actions_weights)  \n            actions_labels=torch.argmax(actions_scores,dim=1) \n\n            # Predict activities\n            activities_loss=F.cross_entropy(activities_scores,activities_in)\n            activities_labels=torch.argmax(activities_scores,dim=1) \n            \n            actions_correct=torch.sum(torch.eq(actions_labels.int(),actions_in.int()).float())\n            activities_correct=torch.sum(torch.eq(activities_labels.int(),activities_in.int()).float())\n            \n            # Get accuracy\n            actions_accuracy=actions_correct.item()/actions_scores.shape[0]\n            activities_accuracy=activities_correct.item()/activities_scores.shape[0]\n\n            actions_meter.update(actions_accuracy, actions_scores.shape[0])\n            activities_meter.update(activities_accuracy, activities_scores.shape[0])\n\n            # Total loss\n            total_loss=activities_loss+cfg.actions_loss_weight*actions_loss\n            loss_meter.update(total_loss.item(), batch_size)\n\n    test_info={\n        'time':epoch_timer.timeit(),\n        'epoch':epoch,\n        'loss':loss_meter.avg,\n        'activities_acc':activities_meter.avg*100,\n        'actions_acc':actions_meter.avg*100\n    }\n    \n    return test_info\n\n\ndef train_collective(data_loader, model, device, optimizer, epoch, cfg):\n    \n    actions_meter=AverageMeter()\n    activities_meter=AverageMeter()\n    loss_meter=AverageMeter()\n    epoch_timer=Timer()\n    for batch_data in data_loader:\n        model.train()\n        model.apply(set_bn_eval)\n    \n        # prepare batch data\n        batch_data=[b.to(device=device) for b in batch_data]\n        batch_size=batch_data[0].shape[0]\n        num_frames=batch_data[0].shape[1]\n\n        # forward\n        actions_scores,activities_scores=model((batch_data[0],batch_data[1],batch_data[4]))\n        \n        actions_in=batch_data[2].reshape((batch_size,num_frames,cfg.num_boxes))\n        activities_in=batch_data[3].reshape((batch_size,num_frames))\n        bboxes_num=batch_data[4].reshape(batch_size,num_frames)\n\n        actions_in_nopad=[]\n        if cfg.training_stage==1:\n            actions_in=actions_in.reshape((batch_size*num_frames,cfg.num_boxes,))\n            bboxes_num=bboxes_num.reshape(batch_size*num_frames,)\n            for bt in range(batch_size*num_frames):\n                N=bboxes_num[bt]\n                actions_in_nopad.append(actions_in[bt,:N])\n        else:\n            for b in range(batch_size):\n                N=bboxes_num[b][0]\n                actions_in_nopad.append(actions_in[b][0][:N])\n        actions_in=torch.cat(actions_in_nopad,dim=0).reshape(-1,)  #ALL_N,\n            \n        if cfg.training_stage==1:\n            activities_in=activities_in.reshape(-1,)\n        else:\n            activities_in=activities_in[:,0].reshape(batch_size,)\n        \n        # Predict actions\n        actions_loss=F.cross_entropy(actions_scores,actions_in,weight=None)  \n        actions_labels=torch.argmax(actions_scores,dim=1)  #B*T*N,\n        actions_correct=torch.sum(torch.eq(actions_labels.int(),actions_in.int()).float())\n\n        # Predict activities\n        activities_loss=F.cross_entropy(activities_scores,activities_in)\n        activities_labels=torch.argmax(activities_scores,dim=1)  #B*T,\n        activities_correct=torch.sum(torch.eq(activities_labels.int(),activities_in.int()).float())\n        \n        \n        # Get accuracy\n        actions_accuracy=actions_correct.item()/actions_scores.shape[0]\n        activities_accuracy=activities_correct.item()/activities_scores.shape[0]\n        \n        actions_meter.update(actions_accuracy, actions_scores.shape[0])\n        activities_meter.update(activities_accuracy, activities_scores.shape[0])\n\n        # Total loss\n        total_loss=activities_loss+cfg.actions_loss_weight*actions_loss\n        loss_meter.update(total_loss.item(), batch_size)\n\n        # Optim\n        optimizer.zero_grad()\n        total_loss.backward()\n        optimizer.step()\n    \n    train_info={\n        'time':epoch_timer.timeit(),\n        'epoch':epoch,\n        'loss':loss_meter.avg,\n        'activities_acc':activities_meter.avg*100,\n        'actions_acc':actions_meter.avg*100\n    }\n    \n    return train_info\n        \n    \ndef test_collective(data_loader, model, device, epoch, cfg):\n    model.eval()\n    \n    actions_meter=AverageMeter()\n    activities_meter=AverageMeter()\n    loss_meter=AverageMeter()\n    \n    epoch_timer=Timer()\n    with torch.no_grad():\n        for batch_data in data_loader:\n            # prepare batch data\n            batch_data=[b.to(device=device) for b in batch_data]\n            batch_size=batch_data[0].shape[0]\n            num_frames=batch_data[0].shape[1]\n            \n            actions_in=batch_data[2].reshape((batch_size,num_frames,cfg.num_boxes))\n            activities_in=batch_data[3].reshape((batch_size,num_frames))\n            bboxes_num=batch_data[4].reshape(batch_size,num_frames)\n\n            # forward\n            actions_scores,activities_scores=model((batch_data[0],batch_data[1],batch_data[4]))\n            \n            actions_in_nopad=[]\n            \n            if cfg.training_stage==1:\n                actions_in=actions_in.reshape((batch_size*num_frames,cfg.num_boxes,))\n                bboxes_num=bboxes_num.reshape(batch_size*num_frames,)\n                for bt in range(batch_size*num_frames):\n                    N=bboxes_num[bt]\n                    actions_in_nopad.append(actions_in[bt,:N])\n            else:\n                for b in range(batch_size):\n                    N=bboxes_num[b][0]\n                    actions_in_nopad.append(actions_in[b][0][:N])\n            actions_in=torch.cat(actions_in_nopad,dim=0).reshape(-1,)  #ALL_N,\n            \n            if cfg.training_stage==1:\n                activities_in=activities_in.reshape(-1,)\n            else:\n                activities_in=activities_in[:,0].reshape(batch_size,)\n\n            actions_loss=F.cross_entropy(actions_scores,actions_in)  \n            actions_labels=torch.argmax(actions_scores,dim=1)  #ALL_N,\n            actions_correct=torch.sum(torch.eq(actions_labels.int(),actions_in.int()).float())\n\n            # Predict activities\n            activities_loss=F.cross_entropy(activities_scores,activities_in)\n            activities_labels=torch.argmax(activities_scores,dim=1)  #B,\n            activities_correct=torch.sum(torch.eq(activities_labels.int(),activities_in.int()).float())\n            \n            # Get accuracy\n            actions_accuracy=actions_correct.item()/actions_scores.shape[0]\n            activities_accuracy=activities_correct.item()/activities_scores.shape[0]\n\n            actions_meter.update(actions_accuracy, actions_scores.shape[0])\n            activities_meter.update(activities_accuracy, activities_scores.shape[0])\n\n            # Total loss\n            total_loss=activities_loss+cfg.actions_loss_weight*actions_loss\n            loss_meter.update(total_loss.item(), batch_size)\n\n    test_info={\n        'time':epoch_timer.timeit(),\n        'epoch':epoch,\n        'loss':loss_meter.avg,\n        'activities_acc':activities_meter.avg*100,\n        'actions_acc':actions_meter.avg*100\n    }\n    \n    return test_info\n"
  },
  {
    "path": "utils.py",
    "content": "import torch\nimport time\n\ndef prep_images(images):\n    \"\"\"\n    preprocess images\n    Args:\n        images: pytorch tensor\n    \"\"\"\n    images = images.div(255.0)\n    \n    images = torch.sub(images,0.5)\n    images = torch.mul(images,2.0)\n    \n    return images\n\ndef calc_pairwise_distance(X, Y):\n    \"\"\"\n    computes pairwise distance between each element\n    Args: \n        X: [N,D]\n        Y: [M,D]\n    Returns:\n        dist: [N,M] matrix of euclidean distances\n    \"\"\"\n    rx=X.pow(2).sum(dim=1).reshape((-1,1))\n    ry=Y.pow(2).sum(dim=1).reshape((-1,1))\n    dist=rx-2.0*X.matmul(Y.t())+ry.t()\n    return torch.sqrt(dist)\n\ndef calc_pairwise_distance_3d(X, Y):\n    \"\"\"\n    computes pairwise distance between each element\n    Args: \n        X: [B,N,D]\n        Y: [B,M,D]\n    Returns:\n        dist: [B,N,M] matrix of euclidean distances\n    \"\"\"\n    B=X.shape[0]\n    \n    rx=X.pow(2).sum(dim=2).reshape((B,-1,1))\n    ry=Y.pow(2).sum(dim=2).reshape((B,-1,1))\n    \n    dist=rx-2.0*X.matmul(Y.transpose(1,2))+ry.transpose(1,2)\n    \n    return torch.sqrt(dist)\n\ndef sincos_encoding_2d(positions,d_emb):\n    \"\"\"\n    Args:\n        positions: [N,2]\n    Returns:\n        positions high-dimensional representation: [N,d_emb]\n    \"\"\"\n\n    N=positions.shape[0]\n    \n    d=d_emb//2\n    \n    idxs = [np.power(1000,2*(idx//2)/d) for idx in range(d)]\n    idxs = torch.FloatTensor(idxs).to(device=positions.device)\n    \n    idxs = idxs.repeat(N,2)  #N, d_emb\n    \n    pos = torch.cat([ positions[:,0].reshape(-1,1).repeat(1,d),positions[:,1].reshape(-1,1).repeat(1,d) ],dim=1)\n\n    embeddings=pos/idxs\n    \n    embeddings[:,0::2]=torch.sin(embeddings[:,0::2])  # dim 2i\n    embeddings[:,1::2]=torch.cos(embeddings[:,1::2])  # dim 2i+1\n    \n    return embeddings\n\n\ndef print_log(file_path,*args):\n    print(*args)\n    if file_path is not None:\n        with open(file_path, 'a') as f:\n            print(*args,file=f)\n\ndef show_config(cfg):\n    print_log(cfg.log_path, '=====================Config=====================')\n    for k,v in cfg.__dict__.items():\n        print_log(cfg.log_path, k,': ',v)\n    print_log(cfg.log_path, '======================End=======================')\n    \ndef show_epoch_info(phase, log_path, info):\n    print_log(log_path, '')\n    if phase=='Test':\n        print_log(log_path, '====> %s at epoch #%d'%(phase, info['epoch']))\n    else:\n        print_log(log_path, '%s at epoch #%d'%(phase, info['epoch']))\n        \n    print_log(log_path, 'Group Activity Accuracy: %.2f%%, Loss: %.5f, Using %.1f seconds'%(\n                info['activities_acc'], info['loss'], info['time']))\n        \ndef log_final_exp_result(log_path, data_path, exp_result):\n    no_display_cfg=['num_workers', 'use_gpu', 'use_multi_gpu', 'device_list',\n                   'batch_size_test', 'test_interval_epoch', 'train_random_seed',\n                   'result_path', 'log_path', 'device']\n    \n    with open(log_path, 'a') as f:\n        print('', file=f)\n        print('', file=f)\n        print('', file=f)\n        print('=====================Config=====================', file=f)\n        \n        for k,v in exp_result['cfg'].__dict__.items():\n            if k not in no_display_cfg:\n                print( k,': ',v, file=f)\n            \n        print('=====================Result======================', file=f)\n        \n        print('Best result:', file=f)\n        print(exp_result['best_result'], file=f)\n            \n        print('Cost total %.4f hours.'%(exp_result['total_time']), file=f)\n        \n        print('======================End=======================', file=f)\n    \n    \n    data_dict=pickle.load(open(data_path, 'rb'))\n    data_dict[exp_result['cfg'].exp_name]=exp_result\n    pickle.dump(data_dict, open(data_path, 'wb'))\n        \n    \nclass AverageMeter(object):\n    \"\"\"\n    Computes the average value\n    \"\"\"\n    def __init__(self):\n        self.reset()\n\n    def reset(self):\n        self.val = 0\n        self.avg = 0\n        self.sum = 0\n        self.count = 0\n\n    def update(self, val, n=1):\n        self.val = val\n        self.sum += val * n\n        self.count += n\n        self.avg = self.sum / self.count\n        \n\nclass Timer(object):\n    \"\"\"\n    class to do timekeeping\n    \"\"\"\n    def __init__(self):\n        self.last_time=time.time()\n        \n    def timeit(self):\n        old_time=self.last_time\n        self.last_time=time.time()\n        return self.last_time-old_time"
  },
  {
    "path": "volleyball.py",
    "content": "import numpy as np\nimport skimage.io\nimport skimage.transform\n\nimport torch\nimport torchvision.transforms as transforms\nfrom torch.utils import data\nimport torchvision.models as models\n\nfrom PIL import Image\nimport random\n\nimport sys\n\"\"\"\nReference:\nhttps://github.com/cvlab-epfl/social-scene-understanding/blob/master/volleyball.py\n\"\"\"\n\nACTIVITIES = ['r_set', 'r_spike', 'r-pass', 'r_winpoint',\n              'l_set', 'l-spike', 'l-pass', 'l_winpoint']\n\nNUM_ACTIVITIES = 8\n\nACTIONS = ['blocking', 'digging', 'falling', 'jumping',\n           'moving', 'setting', 'spiking', 'standing',\n           'waiting']\nNUM_ACTIONS = 9\n\n\ndef volley_read_annotations(path):\n    \"\"\"\n    reading annotations for the given sequence\n    \"\"\"\n    annotations = {}\n\n    gact_to_id = {name: i for i, name in enumerate(ACTIVITIES)}\n    act_to_id = {name: i for i, name in enumerate(ACTIONS)}\n\n    with open(path) as f:\n        for l in f.readlines():\n            values = l[:-1].split(' ')\n            file_name = values[0]\n            activity = gact_to_id[values[1]]\n\n            values = values[2:]\n            num_people = len(values) // 5\n\n            action_names = values[4::5]\n            actions = [act_to_id[name]\n                       for name in action_names]\n\n            def _read_bbox(xywh):\n                x, y, w, h = map(int, xywh)\n                return y, x, y+h, x+w\n            bboxes = np.array([_read_bbox(values[i:i+4])\n                               for i in range(0, 5*num_people, 5)])\n\n            fid = int(file_name.split('.')[0])\n            annotations[fid] = {\n                'file_name': file_name,\n                'group_activity': activity,\n                'actions': actions,\n                'bboxes': bboxes,\n            }\n    return annotations\n\n\ndef volley_read_dataset(path, seqs):\n    data = {}\n    for sid in seqs:\n        data[sid] = volley_read_annotations(path + '/%d/annotations.txt' % sid)\n    return data\n\n\ndef volley_all_frames(data):\n    frames = []\n    for sid, anns in data.items():\n        for fid, ann in anns.items():\n            frames.append((sid, fid))\n    return frames\n\n\ndef volley_random_frames(data, num_frames):\n    frames = []\n    for sid in np.random.choice(list(data.keys()), num_frames):\n        fid = int(np.random.choice(list(data[sid]), []))\n        frames.append((sid, fid))\n    return frames\n\n\ndef volley_frames_around(frame, num_before=5, num_after=4):\n    sid, src_fid = frame\n    return [(sid, src_fid, fid)\n            for fid in range(src_fid-num_before, src_fid+num_after+1)]\n\n\ndef load_samples_sequence(anns,tracks,images_path,frames,image_size,num_boxes=12,):\n    \"\"\"\n    load samples of a bath\n    \n    Returns:\n        pytorch tensors\n    \"\"\"\n    images, boxes, boxes_idx = [], [], []\n    activities, actions = [], []\n    for i, (sid, src_fid, fid) in enumerate(frames):\n        #img=skimage.io.imread(images_path + '/%d/%d/%d.jpg' % (sid, src_fid, fid))\n        #img=skimage.transform.resize(img,(720, 1280),anti_aliasing=True)\n        \n        img = Image.open(images_path + '/%d/%d/%d.jpg' % (sid, src_fid, fid))\n        \n        img=transforms.functional.resize(img,image_size)\n        img=np.array(img)\n        \n        # H,W,3 -> 3,H,W\n        img=img.transpose(2,0,1)\n        images.append(img)\n\n        boxes.append(tracks[(sid, src_fid)][fid])\n        actions.append(anns[sid][src_fid]['actions'])\n        if len(boxes[-1]) != num_boxes:\n          boxes[-1] = np.vstack([boxes[-1], boxes[-1][:num_boxes-len(boxes[-1])]])\n          actions[-1] = actions[-1] + actions[-1][:num_boxes-len(actions[-1])]\n        boxes_idx.append(i * np.ones(num_boxes, dtype=np.int32))\n        activities.append(anns[sid][src_fid]['group_activity'])\n\n\n    images = np.stack(images)\n    activities = np.array(activities, dtype=np.int32)\n    bboxes = np.vstack(boxes).reshape([-1, num_boxes, 4])\n    bboxes_idx = np.hstack(boxes_idx).reshape([-1, num_boxes])\n    actions = np.hstack(actions).reshape([-1, num_boxes])\n    \n    #convert to pytorch tensor\n    images=torch.from_numpy(images).float()\n    bboxes=torch.from_numpy(bboxes).float()\n    bboxes_idx=torch.from_numpy(bboxes_idx).int()\n    actions=torch.from_numpy(actions).long()\n    activities=torch.from_numpy(activities).long()\n\n    return images, bboxes, bboxes_idx, actions, activities\n\n\nclass VolleyballDataset(data.Dataset):\n    \"\"\"\n    Characterize volleyball dataset for pytorch\n    \"\"\"\n    def __init__(self,anns,tracks,frames,images_path,image_size,feature_size,num_boxes=12,num_before=4,num_after=4,is_training=True,is_finetune=False):\n        self.anns=anns\n        self.tracks=tracks\n        self.frames=frames\n        self.images_path=images_path\n        self.image_size=image_size\n        self.feature_size=feature_size\n        \n        self.num_boxes=num_boxes\n        self.num_before=num_before\n        self.num_after=num_after\n        \n        self.is_training=is_training\n        self.is_finetune=is_finetune\n    \n    def __len__(self):\n        \"\"\"\n        Return the total number of samples\n        \"\"\"\n        return len(self.frames)\n    \n    def __getitem__(self,index):\n        \"\"\"\n        Generate one sample of the dataset\n        \"\"\"\n        \n        select_frames=self.volley_frames_sample(self.frames[index])\n        sample=self.load_samples_sequence(select_frames)\n        \n        return sample\n    \n    def volley_frames_sample(self,frame):\n        \n        sid, src_fid = frame\n        \n        if self.is_finetune:\n            if self.is_training:\n                fid=random.randint(src_fid-self.num_before, src_fid+self.num_after)\n                return [(sid, src_fid, fid)]\n            else:\n                return [(sid, src_fid, fid)\n                        for fid in range(src_fid-self.num_before, src_fid+self.num_after+1)]\n        else:\n            if self.is_training:\n                sample_frames=random.sample(range(src_fid-self.num_before, src_fid+self.num_after+1), 3)\n                return [(sid, src_fid, fid) \n                        for fid in sample_frames]\n            else:\n                return [(sid, src_fid, fid) \n                        for fid in  [src_fid-3,src_fid,src_fid+3, src_fid-4,src_fid-1,src_fid+2, src_fid-2,src_fid+1,src_fid+4 ]]\n    \n    \n    def load_samples_sequence(self,select_frames):\n        \"\"\"\n        load samples sequence\n\n        Returns:\n            pytorch tensors\n        \"\"\"\n        \n        OH, OW=self.feature_size\n        \n        images, boxes = [], []\n        activities, actions = [], []\n        for i, (sid, src_fid, fid) in enumerate(select_frames):\n\n            img = Image.open(self.images_path + '/%d/%d/%d.jpg' % (sid, src_fid, fid))\n\n            img=transforms.functional.resize(img,self.image_size)\n            img=np.array(img)\n\n            # H,W,3 -> 3,H,W\n            img=img.transpose(2,0,1)\n            images.append(img)\n\n            temp_boxes=np.ones_like(self.tracks[(sid, src_fid)][fid])\n            for i,track in enumerate(self.tracks[(sid, src_fid)][fid]):\n                \n                y1,x1,y2,x2 = track\n                w1,h1,w2,h2 = x1*OW, y1*OH, x2*OW, y2*OH  \n                temp_boxes[i]=np.array([w1,h1,w2,h2])\n            \n            boxes.append(temp_boxes)\n            \n            \n            actions.append(self.anns[sid][src_fid]['actions'])\n            \n            if len(boxes[-1]) != self.num_boxes:\n                boxes[-1] = np.vstack([boxes[-1], boxes[-1][:self.num_boxes-len(boxes[-1])]])\n                actions[-1] = actions[-1] + actions[-1][:self.num_boxes-len(actions[-1])]\n            activities.append(self.anns[sid][src_fid]['group_activity'])\n\n        images = np.stack(images)\n        activities = np.array(activities, dtype=np.int32)\n        bboxes = np.vstack(boxes).reshape([-1, self.num_boxes, 4])\n        actions = np.hstack(actions).reshape([-1, self.num_boxes])\n        \n\n        #convert to pytorch tensor\n        images=torch.from_numpy(images).float()\n        bboxes=torch.from_numpy(bboxes).float()\n        actions=torch.from_numpy(actions).long()\n        activities=torch.from_numpy(activities).long()\n\n        return images, bboxes,  actions, activities\n    \n"
  }
]