Repository: mostafa-saad/deep-activity-rec Branch: master Commit: 1fb59ba08a27 Files: 103 Total size: 316.1 KB Directory structure: gitextract_mrhtq6xo/ ├── LICENSE ├── Makefile ├── README.md ├── apps/ │ ├── exePhase1_2.cpp │ ├── exePhase3.cpp │ └── exePhase4.cpp ├── dataset-config/ │ ├── test.txt │ ├── train.txt │ ├── trainval.txt │ └── val.txt ├── dataset-config-simple/ │ ├── test.txt │ ├── train.txt │ ├── trainval.txt │ └── val.txt ├── eclipse-project/ │ └── ibrahim16-deep-act-rec-part/ │ ├── .cproject │ ├── .project │ ├── Debug/ │ │ ├── apps/ │ │ │ └── subdir.mk │ │ ├── makefile │ │ ├── objects.mk │ │ ├── sources.mk │ │ └── src/ │ │ └── subdir.mk │ ├── Release/ │ │ ├── apps/ │ │ │ └── subdir.mk │ │ ├── makefile │ │ ├── objects.mk │ │ ├── sources.mk │ │ └── src/ │ │ └── subdir.mk │ ├── apps/ │ │ ├── exePhase1_2.cpp │ │ ├── exePhase3.cpp │ │ └── exePhase4.cpp │ └── src/ │ ├── custom-abbreviation.h │ ├── custom-images-macros.h │ ├── custom-macros.h │ ├── dlib-tracker-wrapper.cpp │ ├── dlib-tracker-wrapper.h │ ├── images-utilities.cpp │ ├── images-utilities.h │ ├── leveldb-reader.cpp │ ├── leveldb-reader.h │ ├── leveldb-writer.cpp │ ├── leveldb-writer.h │ ├── rect-helper.cpp │ ├── rect-helper.h │ ├── utilities.cpp │ ├── utilities.h │ ├── volleyball-dataset-mgr.cpp │ └── volleyball-dataset-mgr.h ├── ibrahim16-cvpr/ │ ├── p1-network1/ │ │ ├── clip_w5.txt │ │ ├── trainval-test-create-mean-script.sh │ │ ├── trainval-test-exe-script-resume.sh │ │ ├── trainval-test-exe-script.sh │ │ ├── trainval-test-network.prototxt │ │ └── trainval-test-solver.prototxt │ ├── p3-extract-features-networks/ │ │ ├── test.prototxt │ │ └── trainval.prototxt │ ├── p4-network2/ │ │ ├── clip_w10.txt │ │ ├── trainval-test-exe-script-resume.sh │ │ ├── trainval-test-exe-script.sh │ │ ├── trainval-test-network.prototxt │ │ ├── trainval-test-solver.prototxt │ │ ├── trainval-test-window-evaluation-exe-script.sh │ │ └── trainval-test-window-evaluation-network.prototxt │ ├── script-clean.sh │ ├── script-p1-data.sh │ ├── script-p1-train-p3-p4.sh │ ├── script-p2-data-fuse.sh │ └── script.sh ├── ibrahim16-cvpr-simple/ │ ├── p1-network1/ │ │ ├── clip_w5.txt │ │ ├── trainval-test-create-mean-script.sh │ │ ├── trainval-test-exe-script-resume.sh │ │ ├── trainval-test-exe-script.sh │ │ ├── trainval-test-network.prototxt │ │ └── trainval-test-solver.prototxt │ ├── p3-extract-features-networks/ │ │ ├── test.prototxt │ │ └── trainval.prototxt │ ├── p4-network2/ │ │ ├── clip_w10.txt │ │ ├── trainval-test-exe-script-resume.sh │ │ ├── trainval-test-exe-script.sh │ │ ├── trainval-test-network.prototxt │ │ ├── trainval-test-solver.prototxt │ │ ├── trainval-test-window-evaluation-exe-script.sh │ │ └── trainval-test-window-evaluation-network.prototxt │ ├── script-clean.sh │ ├── script-simple-expected-log.txt │ └── script-simple.sh ├── src/ │ ├── custom-abbreviation.h │ ├── custom-images-macros.h │ ├── custom-macros.h │ ├── dlib-tracker-wrapper.cpp │ ├── dlib-tracker-wrapper.h │ ├── images-utilities.cpp │ ├── images-utilities.h │ ├── leveldb-reader.cpp │ ├── leveldb-reader.h │ ├── leveldb-writer.cpp │ ├── leveldb-writer.h │ ├── rect-helper.cpp │ ├── rect-helper.h │ ├── utilities.cpp │ ├── utilities.h │ ├── volleyball-dataset-mgr.cpp │ └── volleyball-dataset-mgr.h └── volleyball-simple/ ├── 39/ │ └── annotations.txt └── 41/ └── annotations.txt ================================================ FILE CONTENTS ================================================ ================================================ FILE: LICENSE ================================================ Copyright (c) 2016, Simon Fraser University All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: Makefile ================================================ USE_CPU := -DCPU_ONLY=0 # TODO: Update next 2 directories CAFFE_LSTM_DIR := /caffe-lstm DLIB_DIR := /dlib RM := rm -rf CC := g++ CC_OPTIONS = -std=gnu++0x -Wall -c -fmessage-length=0 -O3 $(USE_CPU) CFLAGS = -fPIC $(CC_OPTIONS) # TODO: Add/Remove if needed (e.g. Opencv directories) INCS_DIRS := -I$(CAFFE_LSTM_DIR)/include -I$(CAFFE_LSTM_DIR)/build/src -I$(DLIB_DIR) # -I/LIB/OPENCV/3.0.0-CUDA65/include \ # -I/usr/include/openblas \ # -I/usr/local/cuda-6.5/include \ # -I/LIB/BOOST/1.57.0/include \ # -I/LIB/GLOG/0.3.3/include \ # -I/LANG/PYTHON/2.7.6-SYSTEM/include/python2.7 LIBS_DIRS := -L$(DLIB_DIR) -L$(CAFFE_LSTM_DIR)/build/lib # -L/LIB/OPENCV/3.0.0-CUDA65/lib \ # -L/usr/lib \ # -L/usr/local/cuda-6.5/lib64 \ # -L/LANG/PYTHON/2.7.6-SYSTEM/lib \ # -L/LIB/GLOG/0.3.3/lib \ # -L/LIB/BOOST/1.57.0/lib \ # -L/cs/vml2/msibrahi/workspaces/software/dlib/examples/build/dlib_build LIBS := -lboost_system -lboost_filesystem -lboost_chrono -lboost_python \ -lopencv_core -lopencv_highgui -lopencv_imgproc -lopencv_ml \ -lpython2.7 -lleveldb -lprotobuf -lgflags -lglog -pthread -lcaffe -ldlib ############################################################ SRC_BASE := src SRC_BASE_OUT = release APP_BASE := apps APP_BASE_OUT = apps-release SRCS := $(wildcard $(SRC_BASE)/*.cpp) OBJS := $(addprefix $(SRC_BASE_OUT)/, $(patsubst %.cpp,%.o,$(notdir $(SRCS)))) DEPS := $(addprefix $(SRC_BASE_OUT)/, $(patsubst %.cpp,%.d,$(notdir $(SRCS)))) ifneq ($(MAKECMDGOALS),clean) ifneq ($(strip $(DEPS)),) -include $(DEPS) endif endif TARGET1 = exePhase1_2 TARGET2 = exePhase3 TARGET3 = exePhase4 all: mkdir -p $(SRC_BASE_OUT) mkdir -p $(APP_BASE_OUT) $(MAKE) $(MAKEFILE) $(TARGET1) $(MAKE) $(MAKEFILE) $(TARGET2) $(MAKE) $(MAKEFILE) $(TARGET3) $(SRC_BASE_OUT)/%.o: $(SRC_BASE)/%.cpp @echo 'Building file: $<' @echo 'Invoking: GCC C++ Compiler' $(CC) $(CFLAGS) $(INCS_DIRS) -fPIC -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<" @echo 'Finished building: $<' @echo ' ' $(APP_BASE_OUT)/%.o: $(APP_BASE)/%.cpp @echo 'Building file: $<' @echo 'Invoking: GCC C++ Compiler' $(CC) $(CFLAGS) $(INCS_DIRS) -fPIC -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<" @echo 'Finished building: $<' @echo ' ' $(TARGET1): $(OBJS) $(APP_BASE_OUT)/$(TARGET1).o @echo 'Building TARGET1: $@' @echo 'Invoking: GCC C++ Linker' $(CC) $(LIBS_DIRS) -o $(TARGET1) $(SRC_BASE_OUT)/*.o $(APP_BASE_OUT)/$(TARGET1).o $(LIBS) @echo 'Finished building TARGET1: $@' @echo ' ' $(TARGET2): $(OBJS) $(APP_BASE_OUT)/$(TARGET2).o @echo 'Building TARGET2: $@' @echo 'Invoking: GCC C++ Linker' $(CC) $(LIBS_DIRS) -o $(TARGET2) $(SRC_BASE_OUT)/*.o $(APP_BASE_OUT)/$(TARGET2).o $(LIBS) @echo 'Finished building TARGET2: $@' @echo ' ' $(TARGET3): $(OBJS) $(APP_BASE_OUT)/$(TARGET3).o @echo 'Building TARGET3: $@' @echo 'Invoking: GCC C++ Linker' $(CC) $(LIBS_DIRS) -o $(TARGET3) $(SRC_BASE_OUT)/*.o $(APP_BASE_OUT)/$(TARGET3).o $(LIBS) @echo 'Finished building TARGET3: $@' @echo ' ' clean: -$(RM) $(SRC_BASE_OUT) -$(RM) $(APP_BASE_OUT) -$(RM) $(TARGET1) -$(RM) $(TARGET2) -$(RM) $(TARGET3) .PHONY: clean all ================================================ FILE: README.md ================================================ ## [A Hierarchical Deep Temporal Model for Group Activity Recognition. Mostafa S. Ibrahim, Srikanth Muralidharan, Zhiwei Deng, Arash Vahdat, Greg Mori. IEEE Computer Vision and Pattern Recognition 2016](http://www.cs.sfu.ca/~mori/research/papers/ibrahim-cvpr16.pdf) ## Contents 0. [History](#history) 0. [Abstract](abstract) 0. [Model](#model) 0. [Dataset](#dataset) 0. [Experiments](#experiments) 0. [Installation](#installation) 0. [License and Citation](#license-and-citation) 0. [Poster and Powerpoint](#poster-and-powerpoint) ## History * The first version of this work is accepted at CVPR 2016. * An extended work is uploaded on arxiv. [Link](http://arxiv.org/pdf/1607.02643v1.pdf). * This version builds on the previous version to include the following: * We have collected an expanded Volleyball dataset that is 3 times larger than CVPR submission. * We conducted further analysis of experimental results and included comparisons to an additional set of baseline methods. * We implemented a variant of our approach to perform spatial pooling strategies over people. * The provided dataset is the expanded version. Please use and compare against this version. ## Abstract In group activity recognition, the temporal dynamics of the whole activity can be inferred based on the dynamics of the individual people representing the activity. We build a deep model to capture these dynamics based on LSTM models. To make use of these observations, we present a **2-stage deep temporal model for the group activity recognition** problem. In our model, a LSTM model is designed to represent **action dynamics of individual people** in a sequence and another LSTM model is designed to **aggregate person-level information** for whole activity understanding. We evaluate our model over two datasets: the Collective Activity Dataset and a new volleyball dataset. ## Model Figure 1 **Figure 1**: High level figure for group activity recognition via a hierarchical model. Each person in a scene is modeled using a temporal model that captures his/her dynamics, these models are integrated into a higher-level model that captures scene-level activity. Figure 2 **Figure 2**: Detailed figure for the model. Given tracklets of K-players, we feed each tracklet in a CNN, followed by a person LSTM layer to represent each player's action. We then pool over all people's temporal features in the scene. The output of the pooling layer is feed to the second LSTM network to identify the whole teams activity. Figure 3 **Figure 3**: Previous basic mode drops spatial information. In updated model, 2-group pooling to capture spatial arrangements of players. ## Dataset ### [NEW Download Link (all below combined google drive](https://drive.google.com/drive/folders/1rmsrG1mgkwxOKhsr-QYoi9Ss92wQmCOS?usp=sharing). ### [Old Download Link](http://vml.cs.sfu.ca/wp-content/uploads/volleyballdataset/volleyball.zip). If links don't work at some point, please email me (mostafa.saad.fci@gmail.com) **Download Error**: Got quota issue? Google 'How To Fix Google Drive Download Quota Exceeded' **UPDATE 1**: many people asked for extracted trajectories. In fact, as in our code, we generate them on the fly using Dlib Tracker. I extrated and saved them to disk (I did few verifications). Hopefully this helps more. [Download](https://drive.google.com/file/d/0B_rSt5dGmwYBQkh2WFNKTjBSeWM/view?usp=sharing). **UPDATE 2**: My College, Jiawei (Eric) He, Recently trained 2 Faster-RCNN detectors using the training detections. One detector just detects the person. The other one detects the action of the person. Each row has format: [Image name # of detections x y w h confidence category (for each detection)]. Multiple scenarios such data can be useful and cut your time. I did few verifications over them. Notice, these data are not used in our models. They are provided to help :). [Download](https://drive.google.com/file/d/0B_rSt5dGmwYBQXVqLUNKd3FUdVE/view?usp=sharing). **UPDATE 3 - NEW**: Special thanks for Norimichi Ukita (a [professor](https://www.toyota-ti.ac.jp/Lab/Denshi/iim/ukita/) at Toyota Technological Institute) for providing manual annotations for the trajectories on all video sequences. [Download](https://drive.google.com/open?id=1M-fXmAVw8WyFr30xb-LMi_Z-Qiv2nFzl). Kindely checkout the README file for data format and cite their paper if used the annotations (Heatmapping of People Involved in Group Activities, Kohei Sendo and Norimichi Ukita, MVA 2019) **UPDATE 4 - NEW**: Special thanks for Mauricio Perez. In their recent paper: [Skeleton-based relational reasoning for group activity analysis](https://www.sciencedirect.com/science/article/abs/pii/S0031320321005409) they manually annotated the ball locations in the frames. Kindely cite their paper if you used their [dataset extension](https://drive.google.com/file/d/1urZpZiiepC85JD1u3VeURgUpztRgI0yl/edit) We collected a new dataset using publicly available **YouTube volleyball** videos. We annotated **4830 frames** that were handpicked from **55 videos** with 9 player action labels and 8 team activity labels. Figure 3 **Figure 3**: A frame labeled as Left Spike and bounding boxes around each team players is annotated in the dataset. Figure 4 **Figure 4**: For each visible player, an action label is annotaed. We used 3493 frames for training, and the remaining 1337 frames for testing. The train-test split of is performed at video level, rather than at frame level so that it makes the evaluation of models more convincing. The list of action and activity labels and related statistics are tabulated in following tables: |Group Activity Class|No. of Instances| |---|---| |Right set|644| |Right spike|623| |Right pass|801| |Right winpoint|295| |Left winpoint|367| |Left pass|826| |Left spike|642| |Left set|633| |Action Classes|No. of Instances| |---|---| |Waiting|3601| |Setting|1332| |Digging|2333| |Falling|1241|| |Spiking|1216| |Blocking|2458| |Jumping|341| |Moving|5121| |Standing|38696| **Further information**: * The dataset contains 55 videos. Each video has a folder for it with unique IDs (0, 1...54) * **Train Videos**: 1 3 6 7 10 13 15 16 18 22 23 31 32 36 38 39 40 41 42 48 50 52 53 54 * **Validation Videos**: 0 2 8 12 17 19 24 26 27 28 30 33 46 49 51 * **Test Videos**: 4 5 9 11 14 20 21 25 29 34 35 37 43 44 45 47 * Inside each video directory, a set of directories corresponds to annotated frames (e.g. volleyball/39/29885) * Video 39, frame ID 29885 * Each frame directory has 41 images (20 images before target frame, **target frame**, 20 frames after target frame) * E.g. for frame ID: 29885 => Window = {29865, 29866.....29885, 29886....29905} * Scences change quite rapidly in volleyball, hence frames beyond that window shouldn't represent belong to target frame most of time. * In our work, we used 5 before and 4 after frames. * Each video directory has annotations.txt file that contains selected frames annotations. * Each annotation line in format: {Frame ID} {Frame Activity Class} {Player Annotation} {Player Annotation} ... * Player Annotation corresponds to a tight bounding box surrounds each player * Each {Player Annotation} in format: {Action Class} X Y W H * Videos with resolution of 1920x1080 are: 2 37 38 39 40 41 44 45 (8 in total). All others are 1280x720. ## Experiments Figure 5 **Table 1**: Comparison of the team activity recognition performance of baselines against our model evaluated on the Volleyball Dataset. Experiments are using 2 group styles with max pool strategy. Last 3 entries comparison against Improved Dense Trajectories approach. ## Installation * There are 2 internal projects: a simple one for sake of validation and other one, the real pipeline. * Download and Install [Dlib library](http://dlib.net/). * Download and Install [Caffe-LSTM library](https://github.com/junhyukoh/caffe-lstm). * Assume your download disk path is `$lstm_path` * `cd $lstm_path/examples` * `git clone https://github.com/mostafa-saad/deep-activity-rec.git` * Open makefile at examples/deep-activity-rec * `Update` path locations of variables CAFFE_LSTM_DIR and DLIB_DIR * `Update` the INCS_DIRS and LIBS_DIRS (based on your environment) * Open examples/deep-activity-rec/ibrahim16-cvpr-simple/script-simple.sh * Update path variable for CAFFE * `cd examples/deep-activity-rec` * Compile code: `make all` * `cd ../..` * Run: `examples/deep-activity-rec/ibrahim16-cvpr-simple/script-simple.sh` * Make sure top console lines don't complain about "NOT exist directory". * You may validate overall console processing with file script-simple-expected-log.txt * If so, fix it, use script-clean.sh, run script-simple.sh. * The code process is of multiple stages as outlined in the script file. * Processing should end with simple accuracy table, all of it being close to zeros. * The key is to check the console log and to make sure there are no errors found. * Otherwise, read the script and try to get the different phases and read logs to get the errors. * Every sub-directory under ibrahim16-cvpr-simple has 1 or more logs. * Directory p4-network2 should have the final model and accuracy table. * If everything went alright, we can proceed with actual pipeline. * Download the dataset to path deep-activity-rec/volleyball * Same directory structure as given deep-activity-rec/volleyball-simple * Whatever steps/changes you did for ibrahim16-cvpr-simple, do it for ibrahim16-cvpr. * Run: `examples/deep-activity-rec/ibrahim16-cvpr/script.sh` * GPU/CPU note: * The script.sh has 2 heavy processing phases that needs CPU. * One can also run the following 2 scripts in parallel on CPU: script-p1-data.sh and script-p2-data-fuse.sh * Then Run on GPU following script: script-p1-train-p3-p4.sh * The main script runs all these scripts in the required order. ## License and Citation Source code is released under the **BSD 2-Clause license** In case using our extended dataset, please site the following 2 publications. Otherwise, cite a suitable subset of them: @inproceedings{msibrahiCVPR16deepactivity, author = {Mostafa S. Ibrahim and Srikanth Muralidharan and Zhiwei Deng and Arash Vahdat and Greg Mori}, title = {A Hierarchical Deep Temporal Model for Group Activity Recognition.}, booktitle = {2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, year = {2016} } @inproceedings{msibrahiPAMI16deepactivity, author = {Mostafa S. Ibrahim and Srikanth Muralidharan and Zhiwei Deng and Arash Vahdat and Greg Mori}, title = {Hierarchical Deep Temporal Models for Group Activity Recognition.}, journal = {arXiv preprint arXiv:1607.02643}, year = {2016} } ## Poster and Powerpoint * You can find a presentation for the paper [here](https://docs.google.com/presentation/d/1iHMRCghn-dOYc2knvTj8Kp27RRojCsLzCbE8Ax5JCOs/edit?usp=sharing). * You can find our CVPR 2016 poster [here](https://github.com/mostafa-saad/deep-activity-rec/blob/master/extra/poster.pdf). Poster Mostafa on left and Srikanth on right while presenting the poster. ================================================ FILE: apps/exePhase1_2.cpp ================================================ /* * w-driver-volleyball-lstm-evaluator.cpp * * Created on: Jul 13, 2015 * Author: msibrahi */ #include #include #include #include #include #include using std::vector; using std::set; using std::map; using std::pair; using std::endl; using std::cout; #include "../src/leveldb-writer.h" #include "../src/custom-macros.h" #include "../src/rect-helper.h" #include "../src/utilities.h" #include "../src/images-utilities.h" #include "../src/custom-images-macros.h" #include "../src/dlib-tracker-wrapper.h" #include "../src/volleyball-dataset-mgr.h" using MostCV::VolleyballPerson; using MostCV::VolleyballVideoData; using MostCV::VolleyballDatasetPart; using MostCV::VolleyballDatasetMgr; using MostCV::RectHelper; #include #include #include const int resize_width = 256; const int resize_height = 256; const int num_channels = 3; const int kPlayersCount = 12; ///////////////////////////////////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { string program_name = MostCV::consumeStringParam(argc, argv); cerr << "Start: " << program_name << endl; // read program entry data string dataset_videos_path = MostCV::consumeStringParam(argc, argv); string config_path = MostCV::consumeStringParam(argc, argv); string leveldb_output_path = MostCV::consumeStringParam(argc, argv); int temporal_window = MostCV::consumeIntParam(argc, argv); int step = MostCV::consumeIntParam(argc, argv); int bIsPrepareLSTMData = MostCV::consumeIntParam(argc, argv); // otherwise fusion data if (bIsPrepareLSTMData) cerr << "LSTM 1 preparation" << endl; else cerr << "Data Fusion for LSTM 2" << endl; assert(temporal_window > 0); MostCV::fixDir(config_path); MostCV::fixDir(dataset_videos_path); MostCV::fixDir(leveldb_output_path); cerr << "Loading the dataset..." << endl; VolleyballDatasetMgr mgr(config_path, dataset_videos_path); cerr << "Temporal window = " << temporal_window << " with step = " << step << "\n\n"; vector > dbMgrs; Mat blackRectImage = Mat::zeros(resize_width, resize_height, CV_8UC3); ///////////////////////////////////////////////////////////////////////////////////////////////////////// // Create leveldb datasets for (auto &dataset : mgr.dataset_division_) { dataset.dataset_db_name_ = dataset.dataset_name_ + "-leveldb"; dataset.dataset_db_path_ = leveldb_output_path + dataset.dataset_db_name_; MostCV::fixDir(dataset.dataset_db_path_); cerr<<"Creating a new dataset\n"; dbMgrs.push_back(new MostCV::LeveldbWriter(dataset.dataset_db_path_, resize_height, resize_width, num_channels, false)); if (bIsPrepareLSTMData) dbMgrs.back()->setLabelsRange(mgr.total_persons_labels); else dbMgrs.back()->setLabelsRange(mgr.total_scene_labels); } ///////////////////////////////////////////////////////////////////////////////////////////////////////// int dataset_pos = 0; boost::mt19937 generator(100); boost::uniform_int<> uni_dist; boost::variate_generator > rand_generator(generator, uni_dist); for (auto dataset : mgr.dataset_division_) { // Shuffle data before use cerr << "Extracting shuffled elements from " << dataset.dataset_name_ << " Data Set. Total videos = " << dataset.videos_vec_.size() << "\n"; Ptr dbMgr = dbMgrs[dataset_pos++]; vector > database_shuffled; for (auto video : dataset.videos_vec_) { for (auto frame_id : video.annot_frame_id_vec_) database_shuffled.push_back(std::make_pair(video, frame_id)); } std::random_shuffle(database_shuffled.begin(), database_shuffled.end(), rand_generator); if (bIsPrepareLSTMData) { cerr << "Total images for current data set is " << database_shuffled.size() << ". Overall entries will be <= " << temporal_window * database_shuffled.size() * kPlayersCount << endl; } else { cerr << "Total images for current data set is " << database_shuffled.size() << ". Overall entries will be = " << temporal_window * database_shuffled.size() * kPlayersCount << endl; } ///////////////////////////////////////////////////////////////////////////////////////////////////////// for (auto database_entry : database_shuffled) { auto video = database_entry.first; string frame_id = database_entry.second; int frame_label = video.annot_frame_id_to_activity_id_map_[frame_id]; // prepare tracking data pair, vector > images_paths_seq = video.GetTemporalWindowPaths(frame_id, temporal_window, step, false); vector imagesSequenceBefore, imagesSequenceAfter; Mat img; for (auto path : images_paths_seq.first) imagesSequenceBefore.push_back(cv::imread(path)); for (auto path : images_paths_seq.second) imagesSequenceAfter.push_back(cv::imread(path)); if (imagesSequenceAfter.size()) img = imagesSequenceAfter.back(); else img = imagesSequenceBefore.back(); assert(!img.empty()); vector &persons = video.annot_frame_id_persons_map_[frame_id]; vector images; vector > persons_tracklets; for (auto person : persons) { MostCV::DlibTrackerWrapper tracker(person.bbox_.r); pair, vector > tracklet = tracker.Process(imagesSequenceBefore, imagesSequenceAfter); images = tracklet.first; persons_tracklets.push_back(tracklet.second); } // generates temporal_window * kPlayersCount * frames int seq_id = 0, person_pos = 0; for (auto tracklet : persons_tracklets) { int rect_pos = 0; for (auto img : images) { dbMgr->clearDatum(); //MostCV::ShowImage(img(tracklet[rect_pos])); assert(dbMgr->addImageToDatum(img(tracklet[rect_pos]), num_channels)); if (bIsPrepareLSTMData) dbMgr->setDatumLabel(persons[person_pos].action_id_); else dbMgr->setDatumLabel(frame_label); dbMgr->addDatumToBatch(video.video_id_ + "_" + frame_id + "_" + MostCV::toIntStr("000", seq_id++)); rect_pos++; } ++person_pos; } // for missing persons, add zero images if (!bIsPrepareLSTMData) { LP(j, kPlayersCount - persons_tracklets.size()) { LP(k, temporal_window) { dbMgr->clearDatum(); assert(dbMgr->addImageToDatum(blackRectImage, num_channels)); dbMgr->setDatumLabel(frame_label); dbMgr->addDatumToBatch(video.video_id_ + "_" + frame_id + "_" + MostCV::toIntStr("000", seq_id++)); } } } } dbMgr->forceFinalize(); } cerr << "\n\nBye: " << program_name << endl; return 0; } ================================================ FILE: apps/exePhase3.cpp ================================================ #include #include #include #include #include using std::vector; using std::set; using std::string; using std::pair; using std::endl; using std::cout; #include "boost/algorithm/string.hpp" #include "google/protobuf/text_format.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/net.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" #include "caffe/util/io.hpp" #include "caffe/vision_layers.hpp" using caffe::Blob; using caffe::Caffe; using caffe::Datum; using caffe::Net; using caffe::Layer; using caffe::LayerParameter; using caffe::DataParameter; using caffe::NetParameter; using boost::shared_ptr; namespace db = caffe::db; #include "../src/utilities.h" #include "../src/leveldb-reader.h" #include "../src/leveldb-writer.h" enum fuse_style { concatenate_players = 0, max_pool_players_1 = 1, // all players in one vec of feature mid size max_pool_players_2 = 2, max_pool_players_4 = 3, // divide the ground 4 blocks and max pool it. E.g. in 16 players, each 4 has max pool avg_pool_players_1 = 4, avg_pool_players_2 = 5, avg_pool_players_4 = 6, sum_pool_players_1 = 7, sum_pool_players_2 = 8, sum_pool_players_4 = 9 }; string fuse_style_sz[] = { "concatenate_players", "max_pool_players_1", "max_pool_players_2", "max_pool_players_4", "avg_pool_players_1", "avg_pool_players_2", "avg_pool_players_4", "sum_pool_players_1", "sum_pool_players_2", "sum_pool_players_4" }; int target_fuse_style = concatenate_players; const int kPlayersCount = 12; void RemoveLastBlock(vector &input, int block_length) { assert((int )input.size() >= block_length); for (int i = 0; i < block_length; ++i) input.pop_back(); } void AddLastBlock(vector &input, int block_length) { for (int i = 0; i < block_length; ++i) input.push_back(0); } void RemoveDummyVectors(vector &input, int block_length) { bool is_all_zeros = true; while (is_all_zeros && (int) input.size() > block_length) { // Leave at least 1 block int last_idx = input.size() - 1; for (int i = 0; i < is_all_zeros && block_length; ++i) is_all_zeros &= input[last_idx - i] == 0; if (is_all_zeros) RemoveLastBlock(input, block_length); } } // target_blocks_cnt = 1 => merge all sub-vectors in 1 block // target_blocks_cnt = 4 => merge every set of consecutive sub-vectors to get total 4 blocks vector VectorsFusing(vector &input, int block_length, int target_blocks_cnt) { // I fixed bug here...hopefully not big problem! if (target_fuse_style == avg_pool_players_1 || target_fuse_style == sum_pool_players_1 || target_fuse_style == max_pool_players_1) RemoveDummyVectors(input, block_length); else if (target_fuse_style == concatenate_players) { int cur_blocks = input.size() / block_length; // then we need specific count of boxes assert(cur_blocks >= kPlayersCount); while (cur_blocks > kPlayersCount) { --cur_blocks; RemoveLastBlock(input, block_length); } } else { RemoveDummyVectors(input, block_length); while (input.size() > 0 && (input.size() % (block_length * target_blocks_cnt) != 0)) AddLastBlock(input, block_length); } vector output; const float* pData = &input[0]; if (input.size() % (block_length * target_blocks_cnt) != 0) { cerr << "Error A%(B*C) != 0 => " << input.size() << " " << block_length << " " << target_blocks_cnt << "\n"; assert(input.size() % (block_length * target_blocks_cnt) == 0); } int merge_blocks_cnt = input.size() / (block_length * target_blocks_cnt); // merge cnt for (int i = 0; i < (int) input.size(); i += block_length * merge_blocks_cnt) { int t = merge_blocks_cnt; vector sub_output(block_length); for (int j = 0; j < block_length; ++j) sub_output[j] = pData[j]; pData += block_length; --t; while (t--) { for (int j = 0; j < block_length; ++j) { if (target_fuse_style == avg_pool_players_1 || target_fuse_style == avg_pool_players_2 || target_fuse_style == avg_pool_players_4 || target_fuse_style == sum_pool_players_1|| target_fuse_style == sum_pool_players_2|| target_fuse_style == sum_pool_players_4) sub_output[j] += pData[j]; else sub_output[j] = std::max(sub_output[j], pData[j]); } pData += block_length; } for (auto val : sub_output) output.push_back(val); } if (target_fuse_style == avg_pool_players_1 || target_fuse_style == avg_pool_players_2 || target_fuse_style == avg_pool_players_4) { for (auto &val : output) val /= merge_blocks_cnt; } return output; } template void feature_extraction_pipeline(int &argc, char** &argv) { target_fuse_style = MostCV::consumeIntParam(argc, argv, "target_fuse_style"); LOG(ERROR)<< "Fusing style = "< > feature_extraction_net(new Net(feature_extraction_proto, caffe::Phase::TEST)); LOG(ERROR)<<"Loading the Model\n"; feature_extraction_net->CopyTrainedLayersFrom(pretrained_binary_proto); vector blob_names_vec; int blobs_cnt = MostCV::consumeIntParam(argc, argv, "blobs_cnt"); assert(blobs_cnt > 0); LOG(ERROR)<<"# of blobs is "<has_blob(blob_name)) << "Unknown feature blob name " << blob_name << " in the network " << feature_extraction_proto; blob_names_vec.push_back(blob_name); } string output_dataset_name = MostCV::consumeStringParam(argc, argv); int num_mini_batches = MostCV::consumeIntParam(argc, argv, "num_mini_batches"); LOG(ERROR)<<"num_mini_batches: "<*> input_vec; int db_entry_idx = 0; int batch_size = -1; int dim_features = -1; std::set batch_labels; // all our batch value must be same std::set dataset_labels; // logically database shouldn't have only 1 label for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) { // e.g. 100 iterations. Probably roll on data if needed feature_extraction_net->Forward(input_vec); // Take one batch of data (e.g. 50 images), and pass them to end of network // Load the Labels const shared_ptr > label_blob = feature_extraction_net->blob_by_name("label"); batch_size = label_blob->num(); // e.g. 16 batches for volleyball..represents the boxes of a frame. assert(batch_size == frames_window * kPlayersCount); batch_labels.clear(); int current_label = -1; for (int n = 0; n < batch_size; ++n) { const Dtype* label_blob_data = label_blob->cpu_data() + label_blob->offset(n); // move offset to ith blob in batch current_label = label_blob_data[0]; // all will be same value batch_labels.insert(current_label); dataset_labels.insert(current_label); } if (batch_labels.size() != 1) { // every 1 batch should have same value cerr << "\n\nERROR. Every 1 batch should have the same value. Inconsistent batch # " << batch_index + 1 << "-th\n"; cerr << "Overall unique labels are: " << batch_labels.size() << ". The appeared labels are: "; for(auto label : batch_labels) cerr< > > feature_blob_vec; for (auto blob_name : blob_names_vec) { shared_ptr > feature_blob = feature_extraction_net->blob_by_name(blob_name); // get e.g. fc7 blob for the batch feature_blob_vec.push_back(feature_blob); } int total_dim_features = 0; static bool print_once_feature_vec = true; if (print_once_feature_vec) LOG(ERROR)<<"\n\n"; for (auto feature_blob : feature_blob_vec) { dim_features = feature_blob->count() / batch_size; // e.g. 4096 total_dim_features += dim_features; // e.g. 4096 of fc7 + 250 of lstm1 if (print_once_feature_vec) LOG(ERROR)<<"ith Vector Length = "< > window_feature_vecs(frames_window); for (int n = 0; n < batch_size; ++n) { for (auto feature_blob : feature_blob_vec) { dim_features = feature_blob->count() / batch_size; // e.g. 4096 const Dtype* feature_blob_data = feature_blob->cpu_data() + feature_blob->offset(n); // move offset to ith blob in batch int p = n % frames_window; for (int d = 0; d < dim_features; ++d) window_feature_vecs[p].push_back(feature_blob_data[d]); } } for (auto &feature_vec : window_feature_vecs) { if (target_fuse_style == max_pool_players_1 || target_fuse_style == avg_pool_players_1 || target_fuse_style == sum_pool_players_1) feature_vec = VectorsFusing(feature_vec, total_dim_features, 1); else if (target_fuse_style == max_pool_players_2 || target_fuse_style == avg_pool_players_2 || target_fuse_style == sum_pool_players_2) feature_vec = VectorsFusing(feature_vec, total_dim_features, 2); else if (target_fuse_style == max_pool_players_4 || target_fuse_style == avg_pool_players_4 || target_fuse_style == sum_pool_players_4) feature_vec = VectorsFusing(feature_vec, total_dim_features, 4); // otherwise, keep it concatenated if (print_once_feature_vec) LOG(ERROR)<<"Fused Vector Length = "< 1); // some variety make sense! } int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); MostCV::consumeStringParam(argc, argv); // read program entry data LOG(ERROR)<< "Make sure to have LD_LIBRARY_PATH pointing to LSTM implementation in case of LSTM\n\n"; // as long as chucks of data while (argc) { if (argc < 6) { LOG(ERROR)<< "At least 6 parameters expected\n"; assert(false); } feature_extraction_pipeline(argc, argv); LOG(ERROR)<< "\n\nSuccessfully extracted the features!\n\n"; } return 0; } ================================================ FILE: apps/exePhase4.cpp ================================================ /* * w-driver-volleyball-lstm-evaluator.cpp * * Created on: Jul 13, 2015 * Author: msibrahi */ #include #include #include #include #include #include #include #include using std::vector; using std::set; using std::multiset; using std::map; using std::pair; using std::string; using std::endl; using std::cerr; #include "boost/algorithm/string.hpp" #include "google/protobuf/text_format.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/net.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" #include "caffe/util/io.hpp" #include "caffe/vision_layers.hpp" using caffe::Blob; using caffe::Caffe; using caffe::Datum; using caffe::Net; using caffe::Layer; using caffe::LayerParameter; using caffe::DataParameter; using caffe::NetParameter; using boost::shared_ptr; namespace db = caffe::db; #include "../src/utilities.h" #include "../src/leveldb-reader.h" void evaluate(vector truthLabels, vector resultLabels, int w) { set total_labels; map > confusion_freq_maps; map label_freq; int correct = 0; cerr<<"\n\n"; for (int i = 0; i < (int) truthLabels.size(); ++i) { correct += truthLabels[i] == resultLabels[i]; cerr << "Test " << i + 1 << ": Result = " << resultLabels[i] << " GroundTruth = " << truthLabels[i] << "\n"; confusion_freq_maps[truthLabels[i]][resultLabels[i]]++; total_labels.insert(truthLabels[i]); total_labels.insert(resultLabels[i]); label_freq[truthLabels[i]]++; } cerr.setf(std::ios::fixed); cerr.precision(2); cerr<<"\n\n"; cerr << "Total testing frames: " << truthLabels.size() << " with temporal window: " << w << "\n"; cerr << "Temporal accuracy : " << 100.0 * correct / truthLabels.size() << " %\n"; cerr << "\n=======================================================================================\n"; cerr << "\nConfusion Matrix - Truth (col) / Result(row)\n\n"; cerr << std::setw(5) << "T/R" << ": "; for (auto r_label : total_labels) cerr << std::setw(5) << r_label; cerr << "\n=======================================================================================\n"; for (auto t_label : total_labels) { int sum = 0; cerr << std::setw(5) << t_label << ": "; for (auto r_label : total_labels) { cerr << std::setw(5) << confusion_freq_maps[t_label][r_label]; sum += confusion_freq_maps[t_label][r_label]; } double percent = 0; if (label_freq[t_label] > 0) percent = 100.0 * confusion_freq_maps[t_label][t_label] / label_freq[t_label]; cerr << " \t=> Total Correct = " << std::setw(5) << confusion_freq_maps[t_label][t_label] << " / " << std::setw(5) << sum << " = " << percent << " %\n"; } cerr<<"\n\n"; cerr << std::setw(7) << "T/R" << ": "; for (auto r_label : total_labels) cerr << std::setw(7) << r_label; cerr << "\n=======================================================================================\n"; for (auto t_label : total_labels) { cerr << std::setw(7) << t_label << ": "; for (auto r_label : total_labels) { double percent = 0; if (label_freq[t_label] > 0) percent = 100.0 * confusion_freq_maps[t_label][r_label] / label_freq[t_label]; cerr << std::setw(7) << percent; } cerr<<"\n"; } cerr<<"\nTo get labels corresponding to IDs..see dataset loading logs\n"; } int getArgmax(vector &v) { int pos = 0; assert(v.size() > 0); for (int j = 1; j < (int) v.size(); ++j) { if (v[j] > v[pos]) pos = j; } return pos; } template void feature_extraction_pipeline(int &argc, char** &argv) { int frames_window = MostCV::consumeIntParam(argc, argv); LOG(ERROR)<< "Temporal Window = " << frames_window; string computation_mode = MostCV::consumeStringParam(argc, argv); if (strcmp(computation_mode.c_str(), "GPU") == 0) { uint device_id = MostCV::consumeIntParam(argc, argv); LOG(ERROR)<< "Using GPU"; LOG(ERROR)<< "Using Device_id = " << device_id; Caffe::SetDevice(device_id); Caffe::set_mode(Caffe::GPU); } else { LOG(ERROR)<< "Using CPU"; Caffe::set_mode(Caffe::CPU); } string pretrained_binary_proto(MostCV::consumeStringParam(argc, argv)); string feature_extraction_proto(MostCV::consumeStringParam(argc, argv)); LOG(ERROR)<<"Model: "< > feature_extraction_net(new Net(feature_extraction_proto, caffe::Phase::TEST)); LOG(ERROR)<<"Loading the Model\n"; feature_extraction_net->CopyTrainedLayersFrom(pretrained_binary_proto); string blob_name = MostCV::consumeStringParam(argc, argv); LOG(ERROR)<<"blob_name: "<has_blob(blob_name)) << "Unknown feature blob name " << blob_name << " in the network " << feature_extraction_proto; int num_mini_batches = MostCV::consumeIntParam(argc, argv); LOG(ERROR)<<"num_mini_batches: "<*> input_vec; int batch_size = -1; int dim_features = -1; std::set labels; // every (2w+1) * batch size MUST all have same label vector truthLabels; vector propAvgMaxResultLabels; for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) { // e.g. 100 iterations. Probably roll on data if needed feature_extraction_net->Forward(input_vec); // Take one batch of data (e.g. 50 images), and pass them to end of network // Load the Labels const shared_ptr > label_blob = feature_extraction_net->blob_by_name("label"); batch_size = label_blob->num(); // e.g. 50 batches assert(batch_size == frames_window); int current_label = -1; for (int n = 0; n < batch_size; ++n) { const Dtype* label_blob_data = label_blob->cpu_data() + label_blob->offset(n); // move offset to ith blob in batch current_label = label_blob_data[0]; // all will be same value labels.insert(current_label); if (n == 0) truthLabels.push_back(current_label); } if (labels.size() != 1) { // every 1 batch should have same value LOG(ERROR)<< "Something wrong. every 1 batch should have same value. New value at element " << batch_index + 1 << "\n"; assert(false); } labels.clear(); const shared_ptr > feature_blob = feature_extraction_net->blob_by_name(blob_name); // get e.g. fc7 blob for the batch dim_features = feature_blob->count() / batch_size; assert(dim_features > 1); const Dtype* feature_blob_data = nullptr; vector test_case_sum(dim_features); for (int n = 0; n < batch_size; ++n) { feature_blob_data = feature_blob->cpu_data() + feature_blob->offset(n); // move offset to ith blob in batch vector test_case; for (int j = 0; j < dim_features; ++j) { test_case.push_back(feature_blob_data[j]); test_case_sum[j] += feature_blob_data[j]; } } propAvgMaxResultLabels.push_back( getArgmax(test_case_sum) ); } evaluate(truthLabels, propAvgMaxResultLabels, 1); } int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); MostCV::consumeStringParam(argc, argv); // read program entry data if (argc < 6) { LOG(ERROR)<< "At least 6 parameters expected\n"; assert(false); } LOG(ERROR)<< "Make sure to have LD_LIBRARY_PATH pointing to LSTM implementation in case of LSTM\n\n"; feature_extraction_pipeline(argc, argv); return 0; } ================================================ FILE: dataset-config/test.txt ================================================ 4 5 9 11 14 20 21 25 29 34 35 37 43 44 45 47 ================================================ FILE: dataset-config/train.txt ================================================ ================================================ FILE: dataset-config/trainval.txt ================================================ 0 1 2 3 6 7 8 10 12 13 15 16 17 18 19 22 23 24 26 27 28 30 31 32 33 36 38 39 40 41 42 46 48 49 50 51 52 53 54 ================================================ FILE: dataset-config/val.txt ================================================ ================================================ FILE: dataset-config-simple/test.txt ================================================ 41 ================================================ FILE: dataset-config-simple/train.txt ================================================ ================================================ FILE: dataset-config-simple/trainval.txt ================================================ 39 ================================================ FILE: dataset-config-simple/val.txt ================================================ ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/.cproject ================================================ ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/.project ================================================ ibrahim16-deep-act-rec-part org.eclipse.cdt.managedbuilder.core.genmakebuilder org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder full,incremental, org.eclipse.cdt.core.cnature org.eclipse.cdt.core.ccnature org.eclipse.cdt.managedbuilder.core.managedBuildNature org.eclipse.cdt.managedbuilder.core.ScannerConfigNature ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/Debug/apps/subdir.mk ================================================ ################################################################################ # Automatically-generated file. Do not edit! ################################################################################ # Add inputs and outputs from these tool invocations to the build variables CPP_SRCS += \ ../apps/exePhase3.cpp OBJS += \ ./apps/exePhase3.o CPP_DEPS += \ ./apps/exePhase3.d # Each subdirectory must supply rules for building sources it contributes apps/%.o: ../apps/%.cpp @echo 'Building file: $<' @echo 'Invoking: GCC C++ Compiler' g++ -I/rcg/software/Linux/RHEL/6/x86_64/LIB/OPENCV/3.0.0-CUDA65/include -I/usr/include/openblas -I/cs/vml2/msibrahi/workspaces/caffe-lstm/include -I/cs/vml2/msibrahi/workspaces/caffe-lstm/build/src -I/cs/vml2/msibrahi/workspaces/software/dlib -I/usr/local/cuda-6.5/include -I/rcg/software/Linux/RHEL/6/x86_64/LIB/BOOST/1.57.0/include -I/rcg/software/Linux/RHEL/6/x86_64/LIB/GLOG/0.3.3/include -I/rcg/software/Linux/RHEL/6/x86_64/LANG/PYTHON/2.7.6-SYSTEM/include/python2.7 -O0 -g3 -Wall -c -fmessage-length=0 -std=c++0x -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<" @echo 'Finished building: $<' @echo ' ' ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/Debug/makefile ================================================ ################################################################################ # Automatically-generated file. Do not edit! ################################################################################ -include ../makefile.init RM := rm -rf # All of the sources participating in the build are defined here -include sources.mk -include src/subdir.mk -include apps/subdir.mk -include subdir.mk -include objects.mk ifneq ($(MAKECMDGOALS),clean) ifneq ($(strip $(CC_DEPS)),) -include $(CC_DEPS) endif ifneq ($(strip $(C++_DEPS)),) -include $(C++_DEPS) endif ifneq ($(strip $(C_UPPER_DEPS)),) -include $(C_UPPER_DEPS) endif ifneq ($(strip $(CXX_DEPS)),) -include $(CXX_DEPS) endif ifneq ($(strip $(CPP_DEPS)),) -include $(CPP_DEPS) endif ifneq ($(strip $(C_DEPS)),) -include $(C_DEPS) endif endif -include ../makefile.defs # Add inputs and outputs from these tool invocations to the build variables # All Target all: ibrahim16-deep-act-rec-part # Tool invocations ibrahim16-deep-act-rec-part: $(OBJS) $(USER_OBJS) @echo 'Building target: $@' @echo 'Invoking: GCC C++ Linker' g++ -L/rcg/software/Linux/RHEL/6/x86_64/LIB/OPENCV/3.0.0-CUDA65/lib -L/cs/vml2/msibrahi/workspaces/caffe-lstm/build/lib -L/usr/local/lib -L/usr/lib -L/usr/local/cuda-6.5/lib64 -L/rcg/software/Linux/RHEL/6/x86_64/LANG/PYTHON/2.7.6-SYSTEM/lib -L/rcg/software/Linux/RHEL/6/x86_64/LIB/GLOG/0.3.3/lib -L/rcg/software/Linux/RHEL/6/x86_64/LIB/BOOST/1.57.0/lib -o "ibrahim16-deep-act-rec-part" $(OBJS) $(USER_OBJS) $(LIBS) @echo 'Finished building target: $@' @echo ' ' # Other Targets clean: -$(RM) $(CC_DEPS)$(C++_DEPS)$(EXECUTABLES)$(C_UPPER_DEPS)$(CXX_DEPS)$(OBJS)$(CPP_DEPS)$(C_DEPS) ibrahim16-deep-act-rec-part -@echo ' ' .PHONY: all clean dependents .SECONDARY: -include ../makefile.targets ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/Debug/objects.mk ================================================ ################################################################################ # Automatically-generated file. Do not edit! ################################################################################ USER_OBJS := /cs/vml2/msibrahi/workspaces/software/dlib/examples/build/dlib_build/libdlib.a LIBS := -lprotobuf -lopencv_core -lopencv_highgui -lopencv_imgproc -lopencv_ml -lleveldb -lgflags -lleveldb -lglog -lboost_system -lboost_filesystem -lboost_chrono -lboost_python -lpython2.7 -lcaffe ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/Debug/sources.mk ================================================ ################################################################################ # Automatically-generated file. Do not edit! ################################################################################ C_UPPER_SRCS := CXX_SRCS := C++_SRCS := OBJ_SRCS := CC_SRCS := ASM_SRCS := CPP_SRCS := C_SRCS := O_SRCS := S_UPPER_SRCS := CC_DEPS := C++_DEPS := EXECUTABLES := C_UPPER_DEPS := CXX_DEPS := OBJS := CPP_DEPS := C_DEPS := # Every subdirectory with source files must be described here SUBDIRS := \ src \ apps \ ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/Debug/src/subdir.mk ================================================ ################################################################################ # Automatically-generated file. Do not edit! ################################################################################ # Add inputs and outputs from these tool invocations to the build variables CPP_SRCS += \ ../src/dlib-tracker-wrapper.cpp \ ../src/images-utilities.cpp \ ../src/leveldb-reader.cpp \ ../src/leveldb-writer.cpp \ ../src/rect-helper.cpp \ ../src/utilities.cpp \ ../src/volleyball-dataset-mgr.cpp OBJS += \ ./src/dlib-tracker-wrapper.o \ ./src/images-utilities.o \ ./src/leveldb-reader.o \ ./src/leveldb-writer.o \ ./src/rect-helper.o \ ./src/utilities.o \ ./src/volleyball-dataset-mgr.o CPP_DEPS += \ ./src/dlib-tracker-wrapper.d \ ./src/images-utilities.d \ ./src/leveldb-reader.d \ ./src/leveldb-writer.d \ ./src/rect-helper.d \ ./src/utilities.d \ ./src/volleyball-dataset-mgr.d # Each subdirectory must supply rules for building sources it contributes src/%.o: ../src/%.cpp @echo 'Building file: $<' @echo 'Invoking: GCC C++ Compiler' g++ -I/rcg/software/Linux/RHEL/6/x86_64/LIB/OPENCV/3.0.0-CUDA65/include -I/usr/include/openblas -I/cs/vml2/msibrahi/workspaces/caffe-lstm/include -I/cs/vml2/msibrahi/workspaces/caffe-lstm/build/src -I/cs/vml2/msibrahi/workspaces/software/dlib -I/usr/local/cuda-6.5/include -I/rcg/software/Linux/RHEL/6/x86_64/LIB/BOOST/1.57.0/include -I/rcg/software/Linux/RHEL/6/x86_64/LIB/GLOG/0.3.3/include -I/rcg/software/Linux/RHEL/6/x86_64/LANG/PYTHON/2.7.6-SYSTEM/include/python2.7 -O0 -g3 -Wall -c -fmessage-length=0 -std=c++0x -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<" @echo 'Finished building: $<' @echo ' ' ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/Release/apps/subdir.mk ================================================ ################################################################################ # Automatically-generated file. Do not edit! ################################################################################ # Add inputs and outputs from these tool invocations to the build variables CPP_SRCS += \ ../apps/exePhase3.cpp OBJS += \ ./apps/exePhase3.o CPP_DEPS += \ ./apps/exePhase3.d # Each subdirectory must supply rules for building sources it contributes apps/%.o: ../apps/%.cpp @echo 'Building file: $<' @echo 'Invoking: GCC C++ Compiler' g++ -I/rcg/software/Linux/RHEL/6/x86_64/LIB/OPENCV/3.0.0-CUDA65/include -I/usr/include/openblas -I/cs/vml2/msibrahi/workspaces/caffe-lstm/include -I/cs/vml2/msibrahi/workspaces/caffe-lstm/build/src -I/cs/vml2/msibrahi/workspaces/software/dlib -I/usr/local/cuda-6.5/include -I/rcg/software/Linux/RHEL/6/x86_64/LIB/BOOST/1.57.0/include -I/rcg/software/Linux/RHEL/6/x86_64/LIB/GLOG/0.3.3/include -I/rcg/software/Linux/RHEL/6/x86_64/LANG/PYTHON/2.7.6-SYSTEM/include/python2.7 -O3 -Wall -c -fmessage-length=0 -std=c++0x -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<" @echo 'Finished building: $<' @echo ' ' ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/Release/makefile ================================================ ################################################################################ # Automatically-generated file. Do not edit! ################################################################################ -include ../makefile.init RM := rm -rf # All of the sources participating in the build are defined here -include sources.mk -include src/subdir.mk -include apps/subdir.mk -include subdir.mk -include objects.mk ifneq ($(MAKECMDGOALS),clean) ifneq ($(strip $(CC_DEPS)),) -include $(CC_DEPS) endif ifneq ($(strip $(C++_DEPS)),) -include $(C++_DEPS) endif ifneq ($(strip $(C_UPPER_DEPS)),) -include $(C_UPPER_DEPS) endif ifneq ($(strip $(CXX_DEPS)),) -include $(CXX_DEPS) endif ifneq ($(strip $(CPP_DEPS)),) -include $(CPP_DEPS) endif ifneq ($(strip $(C_DEPS)),) -include $(C_DEPS) endif endif -include ../makefile.defs # Add inputs and outputs from these tool invocations to the build variables # All Target all: ibrahim16-deep-act-rec-part # Tool invocations ibrahim16-deep-act-rec-part: $(OBJS) $(USER_OBJS) @echo 'Building target: $@' @echo 'Invoking: GCC C++ Linker' g++ -L/rcg/software/Linux/RHEL/6/x86_64/LIB/OPENCV/3.0.0-CUDA65/lib -L/cs/vml2/msibrahi/workspaces/caffe-lstm/build/lib -L/usr/local/lib -L/usr/lib -L/usr/local/cuda-6.5/lib64 -L/rcg/software/Linux/RHEL/6/x86_64/LANG/PYTHON/2.7.6-SYSTEM/lib -L/rcg/software/Linux/RHEL/6/x86_64/LIB/GLOG/0.3.3/lib -L/rcg/software/Linux/RHEL/6/x86_64/LIB/BOOST/1.57.0/lib -o "ibrahim16-deep-act-rec-part" $(OBJS) $(USER_OBJS) $(LIBS) @echo 'Finished building target: $@' @echo ' ' # Other Targets clean: -$(RM) $(CC_DEPS)$(C++_DEPS)$(EXECUTABLES)$(C_UPPER_DEPS)$(CXX_DEPS)$(OBJS)$(CPP_DEPS)$(C_DEPS) ibrahim16-deep-act-rec-part -@echo ' ' .PHONY: all clean dependents .SECONDARY: -include ../makefile.targets ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/Release/objects.mk ================================================ ################################################################################ # Automatically-generated file. Do not edit! ################################################################################ USER_OBJS := /cs/vml2/msibrahi/workspaces/software/dlib/examples/build/dlib_build/libdlib.a LIBS := -lprotobuf -lopencv_core -lopencv_highgui -lopencv_imgproc -lopencv_ml -lgflags -lleveldb -lglog -lboost_system -lboost_filesystem -lboost_chrono -lboost_python -lpython2.7 -lcaffe ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/Release/sources.mk ================================================ ################################################################################ # Automatically-generated file. Do not edit! ################################################################################ C_UPPER_SRCS := CXX_SRCS := C++_SRCS := OBJ_SRCS := CC_SRCS := ASM_SRCS := CPP_SRCS := C_SRCS := O_SRCS := S_UPPER_SRCS := CC_DEPS := C++_DEPS := EXECUTABLES := C_UPPER_DEPS := CXX_DEPS := OBJS := CPP_DEPS := C_DEPS := # Every subdirectory with source files must be described here SUBDIRS := \ src \ apps \ ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/Release/src/subdir.mk ================================================ ################################################################################ # Automatically-generated file. Do not edit! ################################################################################ # Add inputs and outputs from these tool invocations to the build variables CPP_SRCS += \ ../src/dlib-tracker-wrapper.cpp \ ../src/images-utilities.cpp \ ../src/leveldb-reader.cpp \ ../src/leveldb-writer.cpp \ ../src/rect-helper.cpp \ ../src/utilities.cpp \ ../src/volleyball-dataset-mgr.cpp OBJS += \ ./src/dlib-tracker-wrapper.o \ ./src/images-utilities.o \ ./src/leveldb-reader.o \ ./src/leveldb-writer.o \ ./src/rect-helper.o \ ./src/utilities.o \ ./src/volleyball-dataset-mgr.o CPP_DEPS += \ ./src/dlib-tracker-wrapper.d \ ./src/images-utilities.d \ ./src/leveldb-reader.d \ ./src/leveldb-writer.d \ ./src/rect-helper.d \ ./src/utilities.d \ ./src/volleyball-dataset-mgr.d # Each subdirectory must supply rules for building sources it contributes src/%.o: ../src/%.cpp @echo 'Building file: $<' @echo 'Invoking: GCC C++ Compiler' g++ -I/rcg/software/Linux/RHEL/6/x86_64/LIB/OPENCV/3.0.0-CUDA65/include -I/usr/include/openblas -I/cs/vml2/msibrahi/workspaces/caffe-lstm/include -I/cs/vml2/msibrahi/workspaces/caffe-lstm/build/src -I/cs/vml2/msibrahi/workspaces/software/dlib -I/usr/local/cuda-6.5/include -I/rcg/software/Linux/RHEL/6/x86_64/LIB/BOOST/1.57.0/include -I/rcg/software/Linux/RHEL/6/x86_64/LIB/GLOG/0.3.3/include -I/rcg/software/Linux/RHEL/6/x86_64/LANG/PYTHON/2.7.6-SYSTEM/include/python2.7 -O3 -Wall -c -fmessage-length=0 -std=c++0x -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<" @echo 'Finished building: $<' @echo ' ' ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/apps/exePhase1_2.cpp ================================================ /* * w-driver-volleyball-lstm-evaluator.cpp * * Created on: Jul 13, 2015 * Author: msibrahi */ #include #include #include #include #include #include using std::vector; using std::set; using std::map; using std::pair; using std::endl; using std::cout; #include "../src/leveldb-writer.h" #include "../src/custom-macros.h" #include "../src/rect-helper.h" #include "../src/utilities.h" #include "../src/images-utilities.h" #include "../src/custom-images-macros.h" #include "../src/dlib-tracker-wrapper.h" #include "../src/volleyball-dataset-mgr.h" using MostCV::VolleyballPerson; using MostCV::VolleyballVideoData; using MostCV::VolleyballDatasetPart; using MostCV::VolleyballDatasetMgr; using MostCV::RectHelper; #include #include #include const int resize_width = 256; const int resize_height = 256; const int num_channels = 3; const int kPlayersCount = 12; ///////////////////////////////////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { string program_name = MostCV::consumeStringParam(argc, argv); cerr << "Start: " << program_name << endl; // read program entry data string dataset_videos_path = MostCV::consumeStringParam(argc, argv); string config_path = MostCV::consumeStringParam(argc, argv); string leveldb_output_path = MostCV::consumeStringParam(argc, argv); int temporal_window = MostCV::consumeIntParam(argc, argv); int step = MostCV::consumeIntParam(argc, argv); int bIsPrepareLSTMData = MostCV::consumeIntParam(argc, argv); // otherwise fusion data if (bIsPrepareLSTMData) cerr << "LSTM 1 preparation" << endl; else cerr << "Data Fusion for LSTM 2" << endl; assert(temporal_window > 0); MostCV::fixDir(config_path); MostCV::fixDir(dataset_videos_path); MostCV::fixDir(leveldb_output_path); cerr << "Loading the dataset..." << endl; VolleyballDatasetMgr mgr(config_path, dataset_videos_path); cerr << "Temporal window = " << temporal_window << " with step = " << step << "\n\n"; vector > dbMgrs; Mat blackRectImage = Mat::zeros(resize_width, resize_height, CV_8UC3); ///////////////////////////////////////////////////////////////////////////////////////////////////////// // Create leveldb datasets for (auto &dataset : mgr.dataset_division_) { dataset.dataset_db_name_ = dataset.dataset_name_ + "-leveldb"; dataset.dataset_db_path_ = leveldb_output_path + dataset.dataset_db_name_; MostCV::fixDir(dataset.dataset_db_path_); cerr<<"Creating a new dataset\n"; dbMgrs.push_back(new MostCV::LeveldbWriter(dataset.dataset_db_path_, resize_height, resize_width, num_channels, false)); if (bIsPrepareLSTMData) dbMgrs.back()->setLabelsRange(mgr.total_persons_labels); else dbMgrs.back()->setLabelsRange(mgr.total_scene_labels); } ///////////////////////////////////////////////////////////////////////////////////////////////////////// int dataset_pos = 0; boost::mt19937 generator(100); boost::uniform_int<> uni_dist; boost::variate_generator > rand_generator(generator, uni_dist); for (auto dataset : mgr.dataset_division_) { // Shuffle data before use cerr << "Extracting shuffled elements from " << dataset.dataset_name_ << " Data Set. Total videos = " << dataset.videos_vec_.size() << "\n"; Ptr dbMgr = dbMgrs[dataset_pos++]; vector > database_shuffled; for (auto video : dataset.videos_vec_) { for (auto frame_id : video.annot_frame_id_vec_) database_shuffled.push_back(std::make_pair(video, frame_id)); } std::random_shuffle(database_shuffled.begin(), database_shuffled.end(), rand_generator); if (bIsPrepareLSTMData) { cerr << "Total images for current data set is " << database_shuffled.size() << ". Overall entries will be <= " << temporal_window * database_shuffled.size() * kPlayersCount << endl; } else { cerr << "Total images for current data set is " << database_shuffled.size() << ". Overall entries will be = " << temporal_window * database_shuffled.size() * kPlayersCount << endl; } ///////////////////////////////////////////////////////////////////////////////////////////////////////// for (auto database_entry : database_shuffled) { auto video = database_entry.first; string frame_id = database_entry.second; int frame_label = video.annot_frame_id_to_activity_id_map_[frame_id]; // prepare tracking data pair, vector > images_paths_seq = video.GetTemporalWindowPaths(frame_id, temporal_window, step, false); vector imagesSequenceBefore, imagesSequenceAfter; Mat img; for (auto path : images_paths_seq.first) imagesSequenceBefore.push_back(cv::imread(path)); for (auto path : images_paths_seq.second) imagesSequenceAfter.push_back(cv::imread(path)); if (imagesSequenceAfter.size()) img = imagesSequenceAfter.back(); else img = imagesSequenceBefore.back(); assert(!img.empty()); vector &persons = video.annot_frame_id_persons_map_[frame_id]; vector images; vector > persons_tracklets; for (auto person : persons) { MostCV::DlibTrackerWrapper tracker(person.bbox_.r); pair, vector > tracklet = tracker.Process(imagesSequenceBefore, imagesSequenceAfter); images = tracklet.first; persons_tracklets.push_back(tracklet.second); } // generates temporal_window * kPlayersCount * frames int seq_id = 0, person_pos = 0; for (auto tracklet : persons_tracklets) { int rect_pos = 0; for (auto img : images) { dbMgr->clearDatum(); //MostCV::ShowImage(img(tracklet[rect_pos])); assert(dbMgr->addImageToDatum(img(tracklet[rect_pos]), num_channels)); if (bIsPrepareLSTMData) dbMgr->setDatumLabel(persons[person_pos].action_id_); else dbMgr->setDatumLabel(frame_label); dbMgr->addDatumToBatch(video.video_id_ + "_" + frame_id + "_" + MostCV::toIntStr("000", seq_id++)); rect_pos++; } ++person_pos; } // for missing persons, add zero images if (!bIsPrepareLSTMData) { LP(j, kPlayersCount - persons_tracklets.size()) { LP(k, temporal_window) { dbMgr->clearDatum(); assert(dbMgr->addImageToDatum(blackRectImage, num_channels)); dbMgr->setDatumLabel(frame_label); dbMgr->addDatumToBatch(video.video_id_ + "_" + frame_id + "_" + MostCV::toIntStr("000", seq_id++)); } } } } dbMgr->forceFinalize(); } cerr << "\n\nBye: " << program_name << endl; return 0; } ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/apps/exePhase3.cpp ================================================ #include #include #include #include #include using std::vector; using std::set; using std::string; using std::pair; using std::endl; using std::cout; #include "boost/algorithm/string.hpp" #include "google/protobuf/text_format.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/net.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" #include "caffe/util/io.hpp" #include "caffe/vision_layers.hpp" using caffe::Blob; using caffe::Caffe; using caffe::Datum; using caffe::Net; using caffe::Layer; using caffe::LayerParameter; using caffe::DataParameter; using caffe::NetParameter; using boost::shared_ptr; namespace db = caffe::db; #include "../src/utilities.h" #include "../src/leveldb-reader.h" #include "../src/leveldb-writer.h" enum fuse_style { concatenate_players = 0, max_pool_players_1 = 1, // all players in one vec of feature mid size max_pool_players_2 = 2, max_pool_players_4 = 3, // divide the ground 4 blocks and max pool it. E.g. in 16 players, each 4 has max pool avg_pool_players_1 = 4, avg_pool_players_2 = 5, avg_pool_players_4 = 6, sum_pool_players_1 = 7, sum_pool_players_2 = 8, sum_pool_players_4 = 9 }; string fuse_style_sz[] = { "concatenate_players", "max_pool_players_1", "max_pool_players_2", "max_pool_players_4", "avg_pool_players_1", "avg_pool_players_2", "avg_pool_players_4", "sum_pool_players_1", "sum_pool_players_2", "sum_pool_players_4" }; int target_fuse_style = concatenate_players; const int kPlayersCount = 12; void RemoveLastBlock(vector &input, int block_length) { assert((int )input.size() >= block_length); for (int i = 0; i < block_length; ++i) input.pop_back(); } void AddLastBlock(vector &input, int block_length) { for (int i = 0; i < block_length; ++i) input.push_back(0); } void RemoveDummyVectors(vector &input, int block_length) { bool is_all_zeros = true; while (is_all_zeros && (int) input.size() > block_length) { // Leave at least 1 block int last_idx = input.size() - 1; for (int i = 0; i < is_all_zeros && block_length; ++i) is_all_zeros &= input[last_idx - i] == 0; if (is_all_zeros) RemoveLastBlock(input, block_length); } } // target_blocks_cnt = 1 => merge all sub-vectors in 1 block // target_blocks_cnt = 4 => merge every set of consecutive sub-vectors to get total 4 blocks vector VectorsFusing(vector &input, int block_length, int target_blocks_cnt) { // I fixed bug here...hopefully not big problem! if (target_fuse_style == avg_pool_players_1 || target_fuse_style == sum_pool_players_1 || target_fuse_style == max_pool_players_1) RemoveDummyVectors(input, block_length); else if (target_fuse_style == concatenate_players) { int cur_blocks = input.size() / block_length; // then we need specific count of boxes assert(cur_blocks >= kPlayersCount); while (cur_blocks > kPlayersCount) { --cur_blocks; RemoveLastBlock(input, block_length); } } else { RemoveDummyVectors(input, block_length); while (input.size() > 0 && (input.size() % (block_length * target_blocks_cnt) != 0)) AddLastBlock(input, block_length); } vector output; const float* pData = &input[0]; if (input.size() % (block_length * target_blocks_cnt) != 0) { cerr << "Error A%(B*C) != 0 => " << input.size() << " " << block_length << " " << target_blocks_cnt << "\n"; assert(input.size() % (block_length * target_blocks_cnt) == 0); } int merge_blocks_cnt = input.size() / (block_length * target_blocks_cnt); // merge cnt for (int i = 0; i < (int) input.size(); i += block_length * merge_blocks_cnt) { int t = merge_blocks_cnt; vector sub_output(block_length); for (int j = 0; j < block_length; ++j) sub_output[j] = pData[j]; pData += block_length; --t; while (t--) { for (int j = 0; j < block_length; ++j) { if (target_fuse_style == avg_pool_players_1 || target_fuse_style == avg_pool_players_2 || target_fuse_style == avg_pool_players_4 || target_fuse_style == sum_pool_players_1|| target_fuse_style == sum_pool_players_2|| target_fuse_style == sum_pool_players_4) sub_output[j] += pData[j]; else sub_output[j] = std::max(sub_output[j], pData[j]); } pData += block_length; } for (auto val : sub_output) output.push_back(val); } if (target_fuse_style == avg_pool_players_1 || target_fuse_style == avg_pool_players_2 || target_fuse_style == avg_pool_players_4) { for (auto &val : output) val /= merge_blocks_cnt; } return output; } template void feature_extraction_pipeline(int &argc, char** &argv) { target_fuse_style = MostCV::consumeIntParam(argc, argv, "target_fuse_style"); LOG(ERROR)<< "Fusing style = "< > feature_extraction_net(new Net(feature_extraction_proto, caffe::Phase::TEST)); LOG(ERROR)<<"Loading the Model\n"; feature_extraction_net->CopyTrainedLayersFrom(pretrained_binary_proto); vector blob_names_vec; int blobs_cnt = MostCV::consumeIntParam(argc, argv, "blobs_cnt"); assert(blobs_cnt > 0); LOG(ERROR)<<"# of blobs is "<has_blob(blob_name)) << "Unknown feature blob name " << blob_name << " in the network " << feature_extraction_proto; blob_names_vec.push_back(blob_name); } string output_dataset_name = MostCV::consumeStringParam(argc, argv); int num_mini_batches = MostCV::consumeIntParam(argc, argv, "num_mini_batches"); LOG(ERROR)<<"num_mini_batches: "<*> input_vec; int db_entry_idx = 0; int batch_size = -1; int dim_features = -1; std::set batch_labels; // all our batch value must be same std::set dataset_labels; // logically database shouldn't have only 1 label for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) { // e.g. 100 iterations. Probably roll on data if needed feature_extraction_net->Forward(input_vec); // Take one batch of data (e.g. 50 images), and pass them to end of network // Load the Labels const shared_ptr > label_blob = feature_extraction_net->blob_by_name("label"); batch_size = label_blob->num(); // e.g. 16 batches for volleyball..represents the boxes of a frame. assert(batch_size == frames_window * kPlayersCount); batch_labels.clear(); int current_label = -1; for (int n = 0; n < batch_size; ++n) { const Dtype* label_blob_data = label_blob->cpu_data() + label_blob->offset(n); // move offset to ith blob in batch current_label = label_blob_data[0]; // all will be same value batch_labels.insert(current_label); dataset_labels.insert(current_label); } if (batch_labels.size() != 1) { // every 1 batch should have same value cerr << "\n\nERROR. Every 1 batch should have the same value. Inconsistent batch # " << batch_index + 1 << "-th\n"; cerr << "Overall unique labels are: " << batch_labels.size() << ". The appeared labels are: "; for(auto label : batch_labels) cerr< > > feature_blob_vec; for (auto blob_name : blob_names_vec) { shared_ptr > feature_blob = feature_extraction_net->blob_by_name(blob_name); // get e.g. fc7 blob for the batch feature_blob_vec.push_back(feature_blob); } int total_dim_features = 0; static bool print_once_feature_vec = true; if (print_once_feature_vec) LOG(ERROR)<<"\n\n"; for (auto feature_blob : feature_blob_vec) { dim_features = feature_blob->count() / batch_size; // e.g. 4096 total_dim_features += dim_features; // e.g. 4096 of fc7 + 250 of lstm1 if (print_once_feature_vec) LOG(ERROR)<<"ith Vector Length = "< > window_feature_vecs(frames_window); for (int n = 0; n < batch_size; ++n) { for (auto feature_blob : feature_blob_vec) { dim_features = feature_blob->count() / batch_size; // e.g. 4096 const Dtype* feature_blob_data = feature_blob->cpu_data() + feature_blob->offset(n); // move offset to ith blob in batch int p = n % frames_window; for (int d = 0; d < dim_features; ++d) window_feature_vecs[p].push_back(feature_blob_data[d]); } } for (auto &feature_vec : window_feature_vecs) { if (target_fuse_style == max_pool_players_1 || target_fuse_style == avg_pool_players_1 || target_fuse_style == sum_pool_players_1) feature_vec = VectorsFusing(feature_vec, total_dim_features, 1); else if (target_fuse_style == max_pool_players_2 || target_fuse_style == avg_pool_players_2 || target_fuse_style == sum_pool_players_2) feature_vec = VectorsFusing(feature_vec, total_dim_features, 2); else if (target_fuse_style == max_pool_players_4 || target_fuse_style == avg_pool_players_4 || target_fuse_style == sum_pool_players_4) feature_vec = VectorsFusing(feature_vec, total_dim_features, 4); // otherwise, keep it concatenated if (print_once_feature_vec) LOG(ERROR)<<"Fused Vector Length = "< 1); // some variety make sense! } int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); MostCV::consumeStringParam(argc, argv); // read program entry data LOG(ERROR)<< "Make sure to have LD_LIBRARY_PATH pointing to LSTM implementation in case of LSTM\n\n"; // as long as chucks of data while (argc) { if (argc < 6) { LOG(ERROR)<< "At least 6 parameters expected\n"; assert(false); } feature_extraction_pipeline(argc, argv); LOG(ERROR)<< "\n\nSuccessfully extracted the features!\n\n"; } return 0; } ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/apps/exePhase4.cpp ================================================ /* * w-driver-volleyball-lstm-evaluator.cpp * * Created on: Jul 13, 2015 * Author: msibrahi */ #include #include #include #include #include #include #include #include using std::vector; using std::set; using std::multiset; using std::map; using std::pair; using std::string; using std::endl; using std::cerr; #include "boost/algorithm/string.hpp" #include "google/protobuf/text_format.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/net.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" #include "caffe/util/io.hpp" #include "caffe/vision_layers.hpp" using caffe::Blob; using caffe::Caffe; using caffe::Datum; using caffe::Net; using caffe::Layer; using caffe::LayerParameter; using caffe::DataParameter; using caffe::NetParameter; using boost::shared_ptr; namespace db = caffe::db; #include "../src/utilities.h" #include "../src/leveldb-reader.h" void evaluate(vector truthLabels, vector resultLabels, int w) { set total_labels; map > confusion_freq_maps; map label_freq; int correct = 0; cerr<<"\n\n"; for (int i = 0; i < (int) truthLabels.size(); ++i) { correct += truthLabels[i] == resultLabels[i]; cerr << "Test " << i + 1 << ": Result = " << resultLabels[i] << " GroundTruth = " << truthLabels[i] << "\n"; confusion_freq_maps[truthLabels[i]][resultLabels[i]]++; total_labels.insert(truthLabels[i]); total_labels.insert(resultLabels[i]); label_freq[truthLabels[i]]++; } cerr.setf(std::ios::fixed); cerr.precision(2); cerr<<"\n\n"; cerr << "Total testing frames: " << truthLabels.size() << " with temporal window: " << w << "\n"; cerr << "Temporal accuracy : " << 100.0 * correct / truthLabels.size() << " %\n"; cerr << "\n=======================================================================================\n"; cerr << "\nConfusion Matrix - Truth (col) / Result(row)\n\n"; cerr << std::setw(5) << "T/R" << ": "; for (auto r_label : total_labels) cerr << std::setw(5) << r_label; cerr << "\n=======================================================================================\n"; for (auto t_label : total_labels) { int sum = 0; cerr << std::setw(5) << t_label << ": "; for (auto r_label : total_labels) { cerr << std::setw(5) << confusion_freq_maps[t_label][r_label]; sum += confusion_freq_maps[t_label][r_label]; } double percent = 0; if (label_freq[t_label] > 0) percent = 100.0 * confusion_freq_maps[t_label][t_label] / label_freq[t_label]; cerr << " \t=> Total Correct = " << std::setw(5) << confusion_freq_maps[t_label][t_label] << " / " << std::setw(5) << sum << " = " << percent << " %\n"; } cerr<<"\n\n"; cerr << std::setw(7) << "T/R" << ": "; for (auto r_label : total_labels) cerr << std::setw(7) << r_label; cerr << "\n=======================================================================================\n"; for (auto t_label : total_labels) { cerr << std::setw(7) << t_label << ": "; for (auto r_label : total_labels) { double percent = 0; if (label_freq[t_label] > 0) percent = 100.0 * confusion_freq_maps[t_label][r_label] / label_freq[t_label]; cerr << std::setw(7) << percent; } cerr<<"\n"; } cerr<<"\nTo get labels corresponding to IDs..see dataset loading logs\n"; } int getArgmax(vector &v) { int pos = 0; assert(v.size() > 0); for (int j = 1; j < (int) v.size(); ++j) { if (v[j] > v[pos]) pos = j; } return pos; } template void feature_extraction_pipeline(int &argc, char** &argv) { int frames_window = MostCV::consumeIntParam(argc, argv); LOG(ERROR)<< "Temporal Window = " << frames_window; string computation_mode = MostCV::consumeStringParam(argc, argv); if (strcmp(computation_mode.c_str(), "GPU") == 0) { uint device_id = MostCV::consumeIntParam(argc, argv); LOG(ERROR)<< "Using GPU"; LOG(ERROR)<< "Using Device_id = " << device_id; Caffe::SetDevice(device_id); Caffe::set_mode(Caffe::GPU); } else { LOG(ERROR)<< "Using CPU"; Caffe::set_mode(Caffe::CPU); } string pretrained_binary_proto(MostCV::consumeStringParam(argc, argv)); string feature_extraction_proto(MostCV::consumeStringParam(argc, argv)); LOG(ERROR)<<"Model: "< > feature_extraction_net(new Net(feature_extraction_proto, caffe::Phase::TEST)); LOG(ERROR)<<"Loading the Model\n"; feature_extraction_net->CopyTrainedLayersFrom(pretrained_binary_proto); string blob_name = MostCV::consumeStringParam(argc, argv); LOG(ERROR)<<"blob_name: "<has_blob(blob_name)) << "Unknown feature blob name " << blob_name << " in the network " << feature_extraction_proto; int num_mini_batches = MostCV::consumeIntParam(argc, argv); LOG(ERROR)<<"num_mini_batches: "<*> input_vec; int batch_size = -1; int dim_features = -1; std::set labels; // every (2w+1) * batch size MUST all have same label vector truthLabels; vector propAvgMaxResultLabels; for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) { // e.g. 100 iterations. Probably roll on data if needed feature_extraction_net->Forward(input_vec); // Take one batch of data (e.g. 50 images), and pass them to end of network // Load the Labels const shared_ptr > label_blob = feature_extraction_net->blob_by_name("label"); batch_size = label_blob->num(); // e.g. 50 batches assert(batch_size == frames_window); int current_label = -1; for (int n = 0; n < batch_size; ++n) { const Dtype* label_blob_data = label_blob->cpu_data() + label_blob->offset(n); // move offset to ith blob in batch current_label = label_blob_data[0]; // all will be same value labels.insert(current_label); if (n == 0) truthLabels.push_back(current_label); } if (labels.size() != 1) { // every 1 batch should have same value LOG(ERROR)<< "Something wrong. every 1 batch should have same value. New value at element " << batch_index + 1 << "\n"; assert(false); } labels.clear(); const shared_ptr > feature_blob = feature_extraction_net->blob_by_name(blob_name); // get e.g. fc7 blob for the batch dim_features = feature_blob->count() / batch_size; assert(dim_features > 1); const Dtype* feature_blob_data = nullptr; vector test_case_sum(dim_features); for (int n = 0; n < batch_size; ++n) { feature_blob_data = feature_blob->cpu_data() + feature_blob->offset(n); // move offset to ith blob in batch vector test_case; for (int j = 0; j < dim_features; ++j) { test_case.push_back(feature_blob_data[j]); test_case_sum[j] += feature_blob_data[j]; } } propAvgMaxResultLabels.push_back( getArgmax(test_case_sum) ); } evaluate(truthLabels, propAvgMaxResultLabels, 1); } int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); MostCV::consumeStringParam(argc, argv); // read program entry data if (argc < 6) { LOG(ERROR)<< "At least 6 parameters expected\n"; assert(false); } LOG(ERROR)<< "Make sure to have LD_LIBRARY_PATH pointing to LSTM implementation in case of LSTM\n\n"; feature_extraction_pipeline(argc, argv); return 0; } ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/src/custom-abbreviation.h ================================================ /* * custom-abbreviation.h * * Created on: 2015-06-08 * Author: Moustafa S. Ibrahim */ #ifndef CUSTOM_ABBREVIATION_H_ #define CUSTOM_ABBREVIATION_H_ #include namespace MostCV { typedef vector vi; typedef vector vd; typedef vector< vi > vvi; typedef vector< vd > vvd; typedef vector vs; typedef long long ll; typedef long double ld; //typedef unsigned char uchar; const ll OO = (ll)1e10; const double PI = std::acos(-1.0); const long double EPS = (1e-15); // 4 orthogonal directions, 4 diagonal directions and last is same position //int DR11[9] = {1, 0, 0, -1, 1, 1, -1, -1, 0}; //int DC11[9] = {0, 1, -1, 0, -1, 1, -1, 1, 0}; enum DIRS_ENUM {Left, Right, Bottpm, Top}; } #endif /* CUSTOM_ABBREVIATION_H_ */ ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/src/custom-images-macros.h ================================================ /* * custom-images-macros.h * * Created on: 2015-05-21 * Author: Moustafa S. Ibrahim */ #ifndef CUSTOM_IMAGES_MACROS_H_ #define CUSTOM_IMAGES_MACROS_H_ namespace MostCV { #define REPIMG2(y, x, img) for(int y=0;y<(int)(img.rows);++y) for(int x=0;x<(int)(img.cols);++x) #define REPIMG3(y, x, c, img) for(int y=0;y<(int)(img.rows);++y) for(int x=0;x<(int)(img.cols);++x) for(int c=0;c<(int)(img.channels());++c) #define REPIMG_JUMP(y, x, dy, dx, img) for(int y=0;y<(int)(img.rows);y+=dy) for(int x=0;x<(int)(img.cols);x+=dx) } #endif /* CUSTOM_IMAGES_MACROS_H_ */ ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/src/custom-macros.h ================================================ /* * custom-macros.h * * Created on: 2015-05-21 * Author: Moustafa S. Ibrahim */ #ifndef CUSTOM_MACROS_H_ #define CUSTOM_MACROS_H_ namespace MostCV { #define ALL(v) ((v).begin()), ((v).end()) #define RALL(v) ((v).rbegin()), ((v).rend()) #define SZ(v) ((int)((v).size())) #define CLR(v, d) memset(v, d, sizeof(v)) #define REP(i, v) for(int i=0;i=(int)(n);--i) #define REPA(v) lpi(i, 0, SZ(v)) lpi(j, 0, SZ(v[i])) // ToDo: http://www.quora.com/What-are-some-macros-that-are-used-in-programming-contests } #endif /* CUSTOM_MACROS_H_ */ ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/src/dlib-tracker-wrapper.cpp ================================================ /* * dlib-tracker-wrapper.cpp * * Created on: 2015-06-22 * Author: Moustafa S. Ibrahim */ #include "dlib-tracker-wrapper.h" #include "custom-images-macros.h" #include using std::cerr; namespace MostCV { DlibTrackerWrapper::DlibTrackerWrapper(Rect initial_location) { initial_location_ = initial_location; step_ = 0; } Rect DlibTrackerWrapper::UpdateTracker(Mat img) { Rect img_rect = Rect(0, 0, img.cols-1, img.rows-1); cv::Mat gray_img; if (CV_8U != img.type() || 1 != img.channels()) cv::cvtColor(img, gray_img, cv::COLOR_BGR2GRAY); else gray_img = img; dlib::array2d dlib_img(gray_img.rows, gray_img.cols); REPIMG2(y, x, gray_img) dlib_img[y][x] = gray_img.at (y, x); if (step_ == 0) { initial_location_ &= img_rect; // Fix first one in case if(initial_location_.area() == 0) { cerr<<"Dlib: Empty rectangle for tracking! Let's do workaround\n"; initial_location_ = Rect(0, 0, 1, 1); } tracker_.start_track(dlib_img, dlib::centered_rect(dlib::point(initial_location_.x + initial_location_.width / 2, initial_location_.y + initial_location_.height / 2), initial_location_.width, initial_location_.height)); ++step_; return initial_location_; } tracker_.update(dlib_img); int y1 = tracker_.get_position().top(); int x1 = tracker_.get_position().left(); int y2 = tracker_.get_position().bottom(); int x2 = tracker_.get_position().right(); ++step_; Rect rect = Rect(x1, y1, x2-x1, y2-y1); rect &= img_rect; if(rect.area() < 1) // zero areas usually cause problems. Let's give them 1 area box rect = Rect(0, 0, 1, 1); return rect; } // back like: 0 -1 -2 -3 and forward 0 1 2 3 4 5 6. Helps when tracker centered on frame pair, vector > DlibTrackerWrapper::Process(vector backwardImgs, vector forwardImgs) { vector ret; DlibTrackerWrapper backTracker(initial_location_); for(auto img: backwardImgs) ret.push_back( backTracker.UpdateTracker(img) ); if(forwardImgs.size() > 0) { std::reverse(ret.begin(), ret.end()); std::reverse(backwardImgs.begin(), backwardImgs.end()); backwardImgs.pop_back(); ret.pop_back(); // remove the middle, it will be added again. This is initial_location_ } for(auto img: forwardImgs) { ret.push_back( UpdateTracker(img) ); backwardImgs.push_back(img); } return std::make_pair(backwardImgs, ret); } } ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/src/dlib-tracker-wrapper.h ================================================ /* * dlib-tracker-wrapper.h * * Created on: 2015-06-22 * Author: Moustafa S. Ibrahim */ #ifndef DLIB_TRACKER_WRAPPER_H_ #define DLIB_TRACKER_WRAPPER_H_ #include "opencv2/core/core.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" using cv::Mat; using cv::Ptr; using cv::Scalar; using cv::Rect; using cv::Point; using cv::Size; #include #include #include #include #include using std::vector; using std::pair; namespace MostCV { class DlibTrackerWrapper { public: DlibTrackerWrapper(Rect initial_location); Rect UpdateTracker(Mat img); pair, vector > Process(vector backwardImgs, vector forwardImgs); private: dlib::correlation_tracker tracker_; Rect initial_location_; int step_; }; } #endif /* DLIB_TRACKER_WRAPPER_H_ */ ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/src/images-utilities.cpp ================================================ #include "images-utilities.h" #include using std::cout; #include "opencv2/core/core.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" #include "custom-images-macros.h" #include "custom-macros.h" namespace MostCV { void ShowImage(Mat image, int wait, bool bShow, string stringWindowName) { if (bShow) { cv::namedWindow(stringWindowName.c_str(), 1); cv::imshow(stringWindowName.c_str(), image); cv::waitKey(wait); } } void RemoveImagePixels(Mat img, Mat mask, bool is_mask_remove_pixel_black, Point shift) { REPIMG2(y, x, mask) { if (mask.at (y, x) == 0 && !is_mask_remove_pixel_black) continue; if (mask.at (y, x) > 0 && is_mask_remove_pixel_black) continue; if (img.channels() == 3) { for (int c = 0; c < 3; ++c) img.at (y + shift.y, x + shift.x)[c] = 0; } else img.at (y + shift.y, x + shift.x) = 0; } } void FixMask(Mat mask, int threshold) { int cnt = 0; REPIMG2(y, x, mask) { if (mask.at (y, x) >= threshold) { if (mask.at (y, x) != 255) cnt++; mask.at (y, x) = 255; } else { if (mask.at (y, x) != 0) cnt++; mask.at (y, x) = 0; } } //if(cnt) cout<<"FixMask: "< &rectsSoFar, Scalar color) { int lastY = 0; int lastX = 0; Rect imgRect = Rect(0, 0, controlsMat.cols - 1, controlsMat.rows - 1); if (rectsSoFar.size()) { Rect r = rectsSoFar.back(); lastY = r.y + r.height + 5; lastX = r.x; } Rect r(lastX, lastY, 100, 30); if ((r & imgRect) != r) { lastY = 0; lastX = r.x + r.width + 5; r = Rect(lastX, lastY, 100, 30); if ((r & imgRect) != r) return false; } cv::rectangle(controlsMat, r, Scalar(255, 255, 255), 2); cv::putText(controlsMat, buttonName, Point(r.x + 2, r.y + r.height / 2), cv::FONT_HERSHEY_SIMPLEX, 0.5, color); rectsSoFar.push_back(r); return true; } vector > GetConnectedComponenets(Mat img, int area_threshold, int pixels_threshold, Scalar lo_diff, Scalar up_diff, int flags) { assert(area_threshold > 0 && pixels_threshold > 0); Mat uchar_img; Rect img_rect(0, 0, img.cols - 1, img.rows - 1); vector > componenets; if (img.channels() > 1) cvtColor(img, uchar_img, CV_BGR2GRAY); else img.copyTo(uchar_img); REPIMG2(y, x, uchar_img) { int pixel_value = (int) uchar_img.at (y, x); if (pixel_value < 1) continue; Rect rect; Mat mask = Mat::zeros(uchar_img.rows + 2, uchar_img.cols + 2, CV_8UC1); int mask_pixels_cnt = floodFill(uchar_img, mask, Point(x, y), Scalar(0), &rect, lo_diff, up_diff, flags); rect &= img_rect; if (rect.area() >= area_threshold && mask_pixels_cnt >= pixels_threshold) { Ptr component = new CComponenets(); MostCV::FixMask(mask); componenets.push_back(component); component->mask = mask(Rect(1, 1, uchar_img.cols, uchar_img.rows)); component->mask_pixels_cnt = mask_pixels_cnt; component->rect = rect; component->flood_starting_point = Point(x, y); component->parent_mask_topleft_point = Point(0, 0); } } return componenets; } Rect GetInternalBlobRect(Mat mask) { assert(mask.type() == CV_8UC1); vector > comps = MostCV::GetConnectedComponenets(mask); if(comps.size() == 0) return Rect(0, 0, 1, 1); Rect union_rect = comps[0]->rect; REP(i, comps) union_rect |= comps[i]->rect; return union_rect; } vector GetCombinedContour(Mat mask) { vector > contours; vector hierarchy; Mat componentCpy; mask.copyTo(componentCpy); cv::findContours(componentCpy, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE); vector contoursInOne; REP(j, contours) contoursInOne.insert(contoursInOne.end(), contours[j].begin(), contours[j].end()); return contoursInOne; } Rect GetRect(Mat img) { return Rect(0, 0, img.cols-1, img.rows-1); } void CenterRect(Rect &target_rect, int width, int height) { if(width > target_rect.width) { target_rect.x -= (width - target_rect.width)/2; target_rect.width = width; } if(height > target_rect.height) { target_rect.y -= (height - target_rect.height)/2; target_rect.height = height; } } bool CmpRectTopLeft(const Rect &a, const Rect &b) { if(a.y != b.y) return a.y < b.y; return a.x < b.x; } void SaveVideo(vector images, string path, int fps) { if(images.empty()) { std::cerr<<"ERROR: Empty video\n"; return; } cv::VideoWriter videoObject; videoObject.open(path, CV_FOURCC('X','V','I','D'), fps, Size(images[0].cols, images[0].rows), true); if(!videoObject.isOpened()) { std::cerr<<"ERROR: Problem in out video path: "< #include using std::vector; using std::string; #include "opencv2/core/core.hpp" using cv::Mat; using cv::Ptr; using cv::Point; using cv::Rect; using cv::Scalar; using cv::Size; #include "custom-images-macros.h" namespace MostCV { struct CComponenets { Mat mask; int mask_pixels_cnt; Rect rect; Point flood_starting_point; Point parent_mask_topleft_point; }; void ShowImage(Mat image, int wait = 0, bool bShow = true, string stringWindowName = "Image"); void RemoveImagePixels(Mat img, Mat mask, bool is_mask_remove_pixel_black = false, Point shift = Point(0, 0)); void FixMask(Mat mask, int threshold = 10); void Morphology(Mat mask, bool do_open = true, bool do_close = true, int open_kernel_sz = 3, int close_kernel_sz = 15); vector > GetConnectedComponenets(Mat img, int area_threshold = 1, int pixels_threshold = 1, Scalar lo_diff = Scalar(1), Scalar up_diff = Scalar(1), int flags = 4 + (255 << 8)); Rect GetRect(Mat img); Rect GetInternalBlobRect(Mat mask); void CenterRect(Rect &target_rect, int width, int height); vector GetCombinedContour(Mat mask); bool AddButton(Mat controlsMat, string buttonName, vector &rectsSoFar, Scalar color = Scalar(255, 0, 0)); bool CmpRectTopLeft(const Rect &a, const Rect &b); void SaveVideo(vector images, string path, int fps = 25); //////////////////////////// template Mat ToRowMat(const vector &row) { if(row.size() == 0) return Mat(0, 0, cv::DataType::type); const Type *ptr = &row[0]; Mat mat = Mat(1, row.size(), cv::DataType::type); memcpy(mat.data, ptr, row.size()*sizeof(Type)); //Mat tempMat = Mat(featureVec).t(); return mat; } template Mat ToColMat(const vector &col) { if(col.size() == 0) return Mat(0, 0, cv::DataType::type); const Type *ptr = &col[0]; Mat mat = Mat(col.size(), 1, cv::DataType::type); memcpy(mat.data, ptr, col.size()*sizeof(Type)); return mat; } template Mat To2DMat(const vector> & vectors) { Mat mat; for(auto row : vectors) mat.push_back(ToRowMat(row)); return mat; } /* template void perform(function operation, Mat mat) { if(mat.channels() == 2) { REPIMG2(y, x, mat) mat.at<> } else { } } */ } #endif /* IMAGESHELPER_H_ */ ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/src/leveldb-reader.cpp ================================================ /* * leveldb-reader.cpp * * Created on: 2015-05-21 * Author: Moustafa S. Ibrahim */ #include #include #include #include "leveldb-reader.h" using std::ifstream; using std::ofstream; using std::endl; using std::cout; #include "utilities.h" MostCV::LevelDBReader::LevelDBReader(const string & database_path, const string & sorted_list_file) { record_idx_ = 0; cache_limit_ = 1000; database_path_ = database_path; is_caching = true; if (sorted_list_file == "") is_caching = false; if (is_caching) { ifstream ifs(sorted_list_file.c_str()); string line; assert(ifs.is_open()); while (getline(ifs, line)) { int pos = line.find(' '); if (pos != -1) line = line.substr(0, pos); pos = line.find_last_of('/'); if (pos != -1) line = line.substr(pos + 1); if (line != "") vectors_names_.push_back(line); } vector images_names_temp = vectors_names_; std::sort(images_names_temp.begin(), images_names_temp.end()); assert(images_names_temp == vectors_names_); } leveldb::Options options; options.create_if_missing = true; leveldb::Status status = leveldb::DB::Open(options, database_path_, &database_); assert(status.ok()); database_iter_ = database_->NewIterator(leveldb::ReadOptions()); assert(database_iter_ != NULL); database_iter_->SeekToFirst(); } MostCV::LevelDBReader::~LevelDBReader() { if (database_iter_ != NULL) delete database_iter_; if (database_ != NULL) delete database_; } bool MostCV::LevelDBReader::GetNextEntry(string &key, vector &retVec, int &label) { if (!database_iter_->Valid()) return false; Datum datum; datum.clear_float_data(); datum.clear_data(); datum.ParseFromString(database_iter_->value().ToString()); key = database_iter_->key().ToString(); label = datum.label(); int expected_data_size = std::max(datum.data().size(), datum.float_data_size()); const int datum_volume_size = datum.channels() * datum.height() * datum.width(); if (expected_data_size != datum_volume_size) { cout << "Something wrong in saved data."; assert(false); } retVec.resize(datum_volume_size); const string& data = datum.data(); if (data.size() != 0) { // Data stored in string, e.g. just pixel values of 196608 = 256 * 256 * 3 for (int i = 0; i < datum_volume_size; ++i) retVec[i] = data[i]; } else { // Data stored in real feature vector such as 4096 from feature extraction for (int i = 0; i < datum_volume_size; ++i) retVec[i] = datum.float_data(i); } database_iter_->Next(); ++record_idx_; return true; } bool MostCV::LevelDBReader::GetNextEntryByKey(const string & name, vector &retVec, int &label) { if (!is_caching) { cout << "A sorted file MUST be given. What are you trying to retrive!\n"; assert(false); } if (cache_.count(name)) { retVec = cache_[name]; return true; } string key; while (GetNextEntry(key, retVec, label)) { if ((int) cache_items_.size() == cache_limit_) { map >::iterator it = cache_.find(cache_items_.front()); assert(it != cache_.end()); cache_.erase(it); cache_items_.pop_front(); } cache_[vectors_names_[record_idx_ - 1]] = retVec; cache_items_.push_back(vectors_names_[record_idx_ - 1]); if (vectors_names_[record_idx_ - 1] == name) return true; } cout << "Reached end of data: Total Records: " << record_idx_ << "\n"; cout << "Failed to find data for: " << name << " in database path: " << database_path_ << "\n"; assert(false); // We failed to retrieve! return false; } void MostCV::LevelDBReader::Dump(const string & file_path, int featureVectorLimit) { record_idx_ = 0; database_iter_->SeekToFirst(); ofstream ofs(file_path.c_str()); vector retVec; string key; int label; while (GetNextEntry(key, retVec, label)) { ofs << "key=" << key << ", label=" << label << ", features length=" << retVec.size(); if (featureVectorLimit > 0) { ofs << ", truncated"; retVec.resize(featureVectorLimit); // To avoid writing much } ofs << ", feature vec= "; for (size_t i = 0; i < retVec.size(); ++i) ofs << retVec[i] << " "; ofs << "\n"; } ofs.close(); cout << "\nDump done: Total Records: " << record_idx_ << "\n"; } void MostCV::LevelDBReader::DumpSmall(const string &file_path, int featureVectorLimit, bool make_random) { record_idx_ = 0; database_iter_->SeekToFirst(); ofstream ofs(file_path.c_str()); vector retVec; string key; int label; for (int cnt = 0; cnt < 500 && GetNextEntry(key, retVec, label); ++cnt) { ofs << "key=" << key << ", label=" << label << ", features length=" << retVec.size(); if (make_random) std::random_shuffle(retVec.begin(), retVec.end()); if (featureVectorLimit > 0) { ofs << ", truncated"; retVec.resize(featureVectorLimit); // To avoid writing much } ofs << ", feature vec= "; for (size_t i = 0; i < retVec.size(); ++i) ofs << retVec[i] << " "; ofs << "\n"; } ofs.close(); cout << "\nDump done: Total Records: " << record_idx_ << "\n"; } void MostCV::LevelDBReader::ReadLabels(vector &labels, int max_rows) { record_idx_ = 0; database_iter_->SeekToFirst(); labels.clear(); string key; int label; vector retVec; for (int row = 0; GetNextEntry(key, retVec, label); ++row) { if(max_rows != -1 && max_rows == row) break; labels.push_back(label); } } int MostCV::LevelDBReader::GetRecordsCount() { record_idx_ = 0; database_iter_->SeekToFirst(); string key; int label; vector retVec; while (GetNextEntry(key, retVec, label)) ; return record_idx_; } void MostCV::LevelDBReader::SeekToHead() { record_idx_ = 0; database_iter_->SeekToFirst(); } ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/src/leveldb-reader.h ================================================ /* * leveldb-reader.h * * Created on: 2015-05-21 * Author: Moustafa S. Ibrahim */ /* * The file handles the reading of leveldb files. The database hold set of feature vectors of same length. */ #ifndef LEVELDB_READER_H_ #define LEVELDB_READER_H_ #include #include #include #include #include #include #include #include using std::map; using std::deque; using std::vector; using std::string; using std::endl; using std::cout; #include #include #include #include #include "caffe/proto/caffe.pb.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/net.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" #include "caffe/util/io.hpp" #include "caffe/vision_layers.hpp" using caffe::Blob; using caffe::Caffe; using caffe::Datum; using caffe::Net; namespace MostCV { /* * The class opens a leveldb directory, which has set of feature vectors (e.g. extracted by feature_extract tool from caffe tool). * In order, each feature vector has a name is a given in a "sorted" file. * User either can retrieve all feature vectors in order or filter based on name. * * User is expected to use one type only of the GetNextEntry methods. Similarly, if user used Dump method, shouldn't try to use other methods. * Reason behind such limitation: All the methods seek in the file. E.g., after dumping, no more rows to read. * * Usage Example: * * LevelDBReader reader(database_path, sorted_images_list_file); * vector feature_vector; * * while(reader.GetNextEntry()) * doSomething(feature_vector); * */ class LevelDBReader { public: /* * Open and prepare the database for reading. The database is allowed to have more rows than the file such that extra rows has no corresponding name. * * The file names should be sorted. Reason behind that is allowing efficient retrieval (e.g. using caching to last 200 rows). As a result, leveldb should be sorted too based on this key. * * In case no file given, then Just retrieve sequentially from DB. This is more suitable for dumping purposes. */ LevelDBReader(const string & database_path, const string & sorted_list_file = ""); ~LevelDBReader(); // Read the next entry from the file. If no more rows, return false. bool GetNextEntry(string &key, vector &retVec, int &label); // Given entry name from the sorted_images_list_file, return corresponding vector. Consecutive calls should be ordered in name. // If not so, it shouldn't be far from the last sorted element to be retrieved from caching. We cache last X elements. bool GetNextEntryByKey(const string & name, vector &retVec, int &label); // For debugging purposes, dump the database to a file. Truncate after the first "limit" elements. void Dump(const string &file_path, int featureVectorLimit = -1); void DumpSmall(const string &file_path, int featureVectorLimit = -1, bool make_random = true); void ReadLabels(vector &labels, int max_rows = -1); int GetRecordsCount(); void SeekToHead(); private: bool is_caching; vector vectors_names_; string database_path_; leveldb::DB* database_; leveldb::Iterator* database_iter_; // Caching Variables map > cache_; deque cache_items_; int cache_limit_; // Current row index in retrieval int record_idx_; }; } #endif /* LEVELDB_READER_H_ */ ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/src/leveldb-writer.cpp ================================================ /* * LeveldbWriter.cpp * * Created on: 2015-04-02 * Author: Moustafa S. Ibrahim */ #include #include "leveldb-writer.h" using std::cerr; using std::cout; #include "utilities.h" const int WRITING_LIMIT = 1000; namespace MostCV { LeveldbWriter::LeveldbWriter(string db_path_, int resize_height_, int resize_width_, int volumeSize, bool is_virtual_) { max_label_cnt = -1; db_path = db_path_; resize_height = resize_height_; resize_width = resize_width_; volume_size = volumeSize; is_virtual = is_virtual_; cerr<<"\n\nCreates a database at: "< 0) { // then something already defined for the shape datum.set_channels(volume_size); datum.set_height(resize_height); datum.set_width(resize_width); cerr<<"\t(H, W, C) = "<max_label_cnt = max_label_cnt; } void LeveldbWriter::setDatumLabel(int id) { assert(!is_closed); assert(id >= 0); if (max_label_cnt != -1 && id >= max_label_cnt) { cerr << "Wrong label! (Received, expected) = " << id << " - " << max_label_cnt << "\n"; assert(false); } datum.set_label(id); labels.insert(id); labelsVec.push_back(id); } void LeveldbWriter::addDatumToBatch(string key) { assert(!is_closed); if (key != "" && keys.insert(key).second == false) cerr << "Warning: key duplication: " << key << "\n"; if(is_virtual) return; string value; assert(datum.SerializeToString(&value)); string prefix = MostCV::toIntStr("0000000", internal_idx++) + "@"; batch->Put(prefix + key, value); if (++countId % WRITING_LIMIT == 0) writeBatch(); clearDatum(); } void LeveldbWriter::addDatumToBatch(caffe::Datum &datum, string key, int label) { assert(!is_closed); if (keys.insert(key).second == false) cerr << "Warning: Key duplication: " << key << "\n"; assert(label >= 0); string value; datum.set_label(label); labels.insert(label); labelsVec.push_back(label); if(is_virtual) return; assert(datum.SerializeToString(&value)); string prefix = MostCV::toIntStr("0000000", internal_idx++) + "@"; batch->Put(prefix + key, value); if (++countId % WRITING_LIMIT == 0) writeBatch(); clearDatum(); } bool LeveldbWriter::addVectorDatum(const vector &feature_vec) { assert(!is_closed); if(is_virtual) return true; clearDatum(); if (resize_height <= 0) { // use first vector to define the outline datum.set_height(resize_height = feature_vec.size()); datum.set_channels(1); datum.set_width(1); } else assert((int )feature_vec.size() == resize_height * resize_width * volume_size); for (int p = 0; p < (int) feature_vec.size(); ++p) datum.add_float_data(feature_vec[p]); return true; } bool LeveldbWriter::addImageToDatum(Mat imgMat_origin, int num_channels) { assert(!is_closed); if(is_virtual) return true; assert(resize_width > 0 && resize_height > 0); assert(imgMat_origin.channels() == num_channels); // Weird to send it :D Mat imgMat; cv::resize(imgMat_origin, imgMat, Size(resize_width, resize_height)); // add to db: 256 * 256 * 3 = 196608 string* datum_string = datum.mutable_data(); if (num_channels == 3) { for (int c = 0; c < num_channels; ++c) { for (int h = 0; h < imgMat.rows; ++h) { for (int w = 0; w < imgMat.cols; ++w) { datum_string->push_back(static_cast(imgMat.at(h, w)[c])); } } } } else { for (int h = 0; h < imgMat.rows; ++h) { for (int w = 0; w < imgMat.cols; ++w) { datum_string->push_back(static_cast(imgMat.at(h, w))); } } } return true; } bool LeveldbWriter::addImageToDatum(const string& filename, int num_channels) { assert(!is_closed); if(is_virtual) return true; int cv_read_flag = (num_channels == 3 ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE); Mat imgMat_origin = cv::imread(filename, cv_read_flag); if (!imgMat_origin.data) { LOG(ERROR)<< "Could not open or find file " << filename; return false; } return addImageToDatum(imgMat_origin, num_channels); } void LeveldbWriter::writeBatch() { if (is_closed) return; if(is_virtual) return; if (countId == lastCountId) // nothing changed return; leveldb::Status status = db->Write(leveldb::WriteOptions(), batch); CHECK(status.ok()) << "Failed to write the batch. Count id: " << countId << "\n"; delete batch; batch = new leveldb::WriteBatch(); LOG(ERROR)< #include #include using std::vector; using std::set; using std::string; #include #include #include #include "caffe/proto/caffe.pb.h" #include "opencv2/core/core.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" using cv::Mat; using cv::Size; namespace MostCV { class LeveldbWriter { public: // Using zero parameters would mean not interested to add addImageToDatum functionality. LeveldbWriter(string db_path, int resize_height = -1, int resize_width = 1, int volumeSize = 1, bool is_virtual = false); ~LeveldbWriter(); void clearDatum(); void setDatumLabel(int id); bool addImageToDatum(const string& filename, int num_channels); bool addImageToDatum(Mat img, int num_channels); bool addVectorDatum(const vector &feature_vec); void addDatumToBatch(string key = ""); void addDatumToBatch(caffe::Datum &datum, string key, int label); void setLabelsRange(int max_label_cnt); void forceFinalize(); private: void writeBatch(); leveldb::DB* db; leveldb::WriteBatch* batch; caffe::Datum datum; int countId; int lastCountId; string db_path; int resize_height; int resize_width; int volume_size; int internal_idx; set labels; //helps in verification. vector labelsVec; // print purposes int max_label_cnt; set keys; bool is_closed; bool is_virtual; }; } #endif /* LeveldbWriter_H_ */ ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/src/rect-helper.cpp ================================================ /* * RectHelper.cpp * * Created on: 2015-07-06 * Author: Moustafa S. Ibrahim */ #include "rect-helper.h" #include "images-utilities.h" #include "utilities.h" #include "custom-macros.h" namespace MostCV { RectHelper::RectHelper(Rect rect, double score) { r = rect; conf_score = score; color = Scalar(rand() % 256, rand() % 256, rand() % 256); // random color } vector RectHelper::ToRectHelpers(const vector &rectangles_vec) { vector ret; for(auto rect : rectangles_vec) ret.push_back(RectHelper(rect)); return ret; } vector RectHelper::ToRects(const vector &rectangles_vec) { vector ret; for(auto rect : rectangles_vec) ret.push_back(rect.r); return ret; } //////////////////////////// Static Methods ///////////////////////////// void RectHelper::DrawRects(Mat img, const vector &rectangles_vec, bool is_make_copy, bool is_show, Scalar color) { Mat imgTemp; if (is_make_copy) { img.copyTo(imgTemp); img = imgTemp; } for (auto rect_helper : rectangles_vec) cv::rectangle(img, rect_helper.r, (color[0] == -1) ? rect_helper.color : color, 2); int maxArea = 600 * 800; int dif = sqrt(img.rows * img.cols / maxArea); if(dif > 1) { Size size(img.cols / dif, img.rows / dif); Mat toImg; cv::resize(img, toImg, size); img = toImg; } MostCV::ShowImage(img, 0, is_show); } map > RectHelper::LoadImagesRectangles(string path_x1_y1_w_h){ map > retMap; ifstream ifs(path_x1_y1_w_h); int cnt; string image_name; while(ifs>>image_name>>cnt) { vector rectHelpers; while(cnt--) { double x, y, w, h; double score; ifs>>x>>y>>w>>h>>score; rectHelpers.push_back(RectHelper(Rect(x, y, w, h), score)); } retMap[image_name] = rectHelpers; } ifs.close(); return retMap; } void RectHelper::WriteImagesRectangles(const map > &image_rect_helpers_Map, string path_x1_y1_w_h) { ofstream ofs(path_x1_y1_w_h); for (auto img_rects_pair : image_rect_helpers_Map) { ofs< &rects, double conf_score_threshold) { for (size_t i = 0; i < rects.size(); ++i) { if(MostCV::dcmp(rects[i].conf_score, conf_score_threshold) < 0) { rects.erase(rects.begin() + i); --i; } } } bool __CmpSortByConfidence(const RectHelper &a, const RectHelper& b) { return MostCV::dcmp(a.conf_score, b.conf_score) < 0; } void RectHelper::SortByConfidence(vector &rects) { sort(RALL(rects), __CmpSortByConfidence); } bool __CmpSortByTopLeftPoint(const RectHelper &a, const RectHelper& b) { int d = MostCV::dcmp(a.r.x, b.r.x); if(d != 0) return d < 0; return MostCV::dcmp(a.r.y, b.r.y) < 0; } void RectHelper::SortByTopLeftPoint(vector &rects) { sort(RALL(rects), __CmpSortByTopLeftPoint); } } ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/src/rect-helper.h ================================================ /* * RectHelper.h * * Created on: 2015-07-06 * Author: Moustafa S. Ibrahim */ #ifndef RECTHELPER_H_ #define RECTHELPER_H_ #include #include #include #include #include using std::vector; using std::map; using std::string; using std::endl; using std::cout; using std::ifstream; using std::ofstream; #include "opencv2/core/core.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" using cv::Mat; using cv::Scalar; using cv::Rect; using cv::Point; using cv::Size; namespace MostCV { class RectHelper { public: RectHelper(Rect rect = Rect(0, 0, 0, 0), double score = -1); static vector ToRectHelpers(const vector &rectangles_vec); static vector ToRects(const vector &rectangles_vec); static void DrawRects(Mat img, const vector &rectangles_vec, bool is_make_copy = true, bool is_show = true, Scalar color = Scalar(-1, -1, -1)); static void SortByConfidence(vector &rects); static void SortByTopLeftPoint(vector &rects); static void FilterBelowConfidenceThreshold(vector &rects, double conf_score_threshold); static map > LoadImagesRectangles(string path_x1_y1_w_h); static void WriteImagesRectangles(const map > &imageRectHelpersMap, string path_x1_y1_w_h); Rect r; double conf_score; string category; // E.g. Car bbox int category_idx; Scalar color; // For drawing Mat image; // Image the rectangle belong to it string image_name; string image_path; string image_parent_path; }; } #endif /* RECTHELPER_H_ */ ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/src/utilities.cpp ================================================ /* * Utilities.cpp * * Created on: 2015-03-13 * Author: Moustafa S. Ibrahim */ #include "utilities.h" #include #include #include #include using std::memcpy; using std::fabs; #include #include #include #include #include namespace bst_fs = boost::filesystem; using namespace boost::filesystem; #include "custom-abbreviation.h" namespace MostCV { int dcmp(double x, double y) { return fabs(x - y) <= EPS ? 0 : x < y ? -1 : 1; } map BuildStringIdMap(set classes) { map classId; REPIT(strIt, classes) { string str = *strIt; if (classId.count(str) == 0) { int sz = classId.size(); classId[str] = sz; } } return classId; } map BuildStringIdMap(vector classesVec) { set classes(classesVec.begin(), classesVec.end()); return BuildStringIdMap(classes); } int UpdateStringIdMap(map &classId, string str) { if (classId.count(str) == 0) { int sz = classId.size(); classId[str] = sz; return sz; } return classId[str]; } double round(double d, int precision) { ostringstream oss; oss.setf(std::ios::fixed); oss.precision(precision); oss << d; istringstream iss(oss.str()); iss >> d; return d; } void fixDir(string &dir) { if (SZ(dir) == 0) return; if (dir[SZ(dir) - 1] != PATH_SEP) dir += PATH_SEP; } string getFileName(string dir) { int idx = dir.find_last_of(PATH_SEP); if (idx == -1) return dir; return dir.substr(idx + 1); } bool fileExist(string szFilePath, bool print) { ifstream fin(szFilePath.c_str()); if (!fin) { if (print) printf("fileExist: Failed to open file [%s]\n", szFilePath.c_str()); return false; } fin.close(); return true; } string trim(string str) { int s = 0, e = SZ(str) - 1; REP(i, str) { if (!isspace(str[i])) break; s++; } LPD(i, SZ(str)-1, 0) { if (!isspace(str[i])) break; e--; } if (s > e) return ""; return str.substr(s, e - s + 1); } string toLower(string str) { string ret = ""; REP(i, str) ret += tolower(str[i]); return ret; } string toUpper(string str) { string ret = ""; REP(i, str) ret += toupper(str[i]); return ret; } bool startsWith(string str, string pat) { return (int) str.find(pat) == 0; } int random(int range) { return rand() % range; } char* toCharArr(string str) { char *s = new char[SZ(str) + 1]; s[SZ(str)] = '\0'; memcpy(s, str.c_str(), SZ(str)); return s; } string toIntStr(string st, int add, bool append_zeros) { int val = toType(st, 1); val += add; string ret = toString(val); if (append_zeros && ret.size() < st.size()) ret = string(st.size() - ret.size(), '0') + ret; //pad zeros return ret; } string removeExt(string name) { int pos = name.find_last_of('.'); if (pos != -1) name = name.substr(0, pos); return name; } bool IsPathExist(string path) { return boost::filesystem::exists(path); } int CountFileLines(string path) { std::ifstream inFile(path); if(inFile.fail()) { cerr<<"Couldn't open path: "<(inFile), std::istreambuf_iterator(), '\n'); inFile.close(); return ans; } vector GetPerm(int length, int seed) { boost::mt19937 randGenerator(seed); boost::uniform_int<> uniform_int_dist; boost::variate_generator > rand_generator(randGenerator, uniform_int_dist); vector perm(length); for (int i = 0; i < (int) perm.size(); ++i) perm[i] = i; return perm; } string consumeStringParam(int &argc, char** &argv, string variable_name) { return consumeParam(argc, argv, string(""), variable_name); } int consumeIntParam(int &argc, char** &argv, string variable_name) { return consumeParam(argc, argv, 1, variable_name); } double consumeDoubleParam(int &argc, char** &argv, string variable_name) { return consumeParam(argc, argv, 1.0, variable_name); } vector GetDirs(string szRoot) { vector ret; for (bst_fs::directory_iterator itr(szRoot); itr != bst_fs::directory_iterator(); ++itr) { string path_str = itr->path().c_str(); if (bst_fs::is_directory((itr->status()))) ret.push_back(path_str); } sort(ret.begin(), ret.end()); return ret; } vector GetDirsNames(string szRoot) { vector ret; for (bst_fs::directory_iterator itr(szRoot); itr != bst_fs::directory_iterator(); ++itr) { string path_str = itr->path().c_str(); if (bst_fs::is_directory((itr->status()))) ret.push_back(itr->path().filename().c_str()); } sort(ret.begin(), ret.end()); return ret; } vector GetFiles(string szRoot, string endwith) { vector ret; for (bst_fs::directory_iterator itr(szRoot); itr != bst_fs::directory_iterator(); ++itr) { string path_str = itr->path().c_str(); if (bst_fs::is_regular_file((itr->status()))) { if(endwith == "" || boost::algorithm::ends_with(path_str, endwith)) ret.push_back(path_str); } } sort(ret.begin(), ret.end()); return ret; } vector GetFilesExt(string szRoot, string endwith) { vector ret; for (bst_fs::directory_iterator itr(szRoot); itr != bst_fs::directory_iterator(); ++itr) { string path_str = itr->path().c_str(); if (bst_fs::is_regular_file((itr->status()))) { if(endwith == "" || boost::algorithm::ends_with(path_str, endwith)) ret.push_back(itr->path().extension().c_str()); } } sort(ret.begin(), ret.end()); return ret; } vector GetFilesNames(string szRoot, string endwith) { vector ret; for (bst_fs::directory_iterator itr(szRoot); itr != bst_fs::directory_iterator(); ++itr) { string path_str = itr->path().c_str(); if (bst_fs::is_regular_file((itr->status()))) { if(endwith == "" || boost::algorithm::ends_with(path_str, endwith)) ret.push_back(itr->path().filename().c_str()); } } sort(ret.begin(), ret.end()); return ret; } } ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/src/utilities.h ================================================ /* * general_utilities.h * * Created on: 2015-03-11 * Author: Moustafa S. Ibrahim */ #ifndef GENERAL_UTILITIES_H_ #define GENERAL_UTILITIES_H_ #include "custom-macros.h" #include #include #include #include #include #include #include #include using std::string; using std::ostringstream; using std::istringstream; using std::ifstream; using std::set; using std::map; using std::vector; using std::cout; using std::cerr; using std::pair; namespace MostCV { const char PATH_SEP = '/'; int dcmp(double x, double y); double round(double d, int precision); void fixDir(string &dir); bool IsPathExist(string path); string getFileName(string dir); bool fileExist(string szFilePath, bool print = true); string trim(string str); string toLower(string str); string toUpper(string str); bool startsWith(string str, string pat); int random(int range); char* toCharArr(string str); string toIntStr(string st, int add, bool append_zeros = true); string removeExt(string name); map BuildStringIdMap(set classId); map BuildStringIdMap(vector classesVec); int UpdateStringIdMap(map &items_map, string str); int CountFileLines(string path); vector GetPerm(int length, int seed = 123); string consumeStringParam(int &argc, char** &argv, string variable_name = ""); int consumeIntParam(int &argc, char** &argv, string variable_name = ""); double consumeDoubleParam(int &argc, char** &argv, string variable_name = ""); vector GetDirs(string szRoot); vector GetDirsNames(string szRoot); vector GetFiles(string szRoot, string endwith = ""); vector GetFilesExt(string szRoot, string endwith = ""); vector GetFilesNames(string szRoot, string endwith = ""); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template Type toType(string data, Type indicator, string variable_name = "") { istringstream iss(data); Type item; iss >> item; if(iss.fail()) { if(variable_name != "") cerr<<"Problem in reading variable: "< Type consumeParam(int &argc, char** &argv, Type indicator, string variable_name = "") { assert(argc > 0); string ret = argv[0]; --argc, ++argv; return toType(ret, indicator, variable_name); } template char* toCharPtr(Type val) { ostringstream oss; oss << val; return toCharArr(oss.str()); } template string toString(Type val) { ostringstream oss; oss << val; return oss.str(); } template vector readStringItems(string data, Type indicator) { vector items; Type item; istringstream iss(data); while (iss >> item) items.push_back(item); return items; } template vector readFileItems(string filePath, Type indicator, bool print = true) { vector items; Type item; ifstream fin(filePath.c_str()); if (!fin) { if (print) printf("\n\tWARNING: readFileItems: Failed to open file [%s]\n", filePath.c_str()); fflush(stdout); return items; } while (fin >> item) items.push_back(item); fin.close(); return items; } template vector readFileItems(ifstream &fin, Type indicator, int length = -1) { Type item; vector items; if(length == -1) { while (items.sizefin >> item) items.push_back(item); } else { items.resize(length); for (int pos = 0; pos < items.size(); ++pos) { fin >> item; assert(!fin.fail()); items[pos] = item; } } return items; } template vector readStreamItems(istringstream &iss, Type indicator, int length = -1) { Type item; vector items; if(length == -1) { while (iss >> item) items.push_back(item); } else { items.resize(length); for (int pos = 0; pos < items.size(); ++pos) { iss >> item; assert(!iss.fail()); items[pos] = item; } } return items; } template vector > read2dFileItems(string filePath, Type indicator, bool print = true) { vector > items; ifstream fin(filePath.c_str()); if (fin.fail()) { printf("read2dFileItems: Failed to open file [%s]\n", filePath.c_str()); assert(false); } string line; while (getline(fin, line)) { if(line != "") items.push_back(readStringItems(line, indicator)); } return items; } // For every element that has max frequency, add its position. Total elements equal to # of unqiue elements // 2 3 2 2 2 2 4 4 => 0 6 1 template vector getMaxFrequentPositions(vector &vec) { vector retVec; map > freq_map; for (int i = 0; i < vec.size(); ++i) freq_map[vec[i]].push_back(i); set >, std::greater > > > freqs; for (auto kv : freq_map) freqs.insert(std::make_pair(kv.second.size(), kv.second)); for (auto group : freqs) retVec.push_back(group.second[0]); return retVec; } template Type getMaxFrequentLabel(vector &vec) { assert(vec.size() > 0); vector pos = getMaxFrequentPositions(vec); return vec[ pos[0] ]; } template map getFrequencyMap(const vector &vec, bool print = false) { map freq_map; for (int i = 0; i < vec.size(); ++i) freq_map[vec[i]]++; if (print) { for (auto kv : freq_map) cerr << "Key = "< Value " << kv.second << " instances\n"; } return freq_map; } template map getFrequencyMapPercent(vector &vec, bool print = false) { map freq_map; for (int i = 0; i < vec.size(); ++i) freq_map[vec[i]]++; if (print) { cerr.precision(1); cerr.setf(std::ios::fixed); for (auto kv : freq_map) cerr << "Key = "< Value " << 100.0 * kv.second / (double)vec.size()<< " %\n"; } return freq_map; } template vector castVector(const vector &row, Type2 indicator) { vector ret; ret.reserve(row.size()); for(auto val : row) ret.push_back((Type2)val); return ret; } template vector> cast2DVector(const vector> &matrix, Type2 indicator) { vector> ret; ret.reserve(matrix.size()); for(auto row : matrix) ret.push_back(castVector(row, indicator)); return ret; } } #endif /* GENERAL_UTILITIES_H_ */ ================================================ FILE: eclipse-project/ibrahim16-deep-act-rec-part/src/volleyball-dataset-mgr.cpp ================================================ /* * volleyball-dataset-mgr.cpp * * Created on: Nov 28, 2015 * Author: msibrahi */ #include "volleyball-dataset-mgr.h" #include namespace bst_fs = boost::filesystem; #include "utilities.h" #include "images-utilities.h" #include #include #include namespace MostCV { map global_video_id_frame_id_to_activityId; map > global_video_id_frame_id_to_persons; map persons_actions_ids_map; map scene_activities_ids_map; // statistics map scene_activities_freq_map; map players_activities_freq_map; VolleyballVideoData::VolleyballVideoData(string video_id, string video_dir) { MostCV::fixDir(video_dir); video_id_ = video_id; video_dir_ = video_dir; string annot_file = video_dir + "annotations.txt"; vector > data2dVec = MostCV::read2dFileItems(annot_file, string(""), false); // For every frame, read the players in it for (auto frame_data : data2dVec) { VolleyballPerson person; string frame_id = frame_data[0]; GetFramePath(frame_id); // verify on hard disk frame_data.erase(frame_data.begin()); // if (frame_data[0].find("win") == string::npos) // continue; scene_activities_freq_map[ frame_data[0] ]++; int frame_activity_id = MostCV::UpdateStringIdMap(scene_activities_ids_map, frame_data[0]); annot_frame_id_to_activity_id_map_[frame_id] = frame_activity_id; frame_data.erase(frame_data.begin()); pair min_max_persons_y = { 10000, 0 }; for (int k = 0; k < (int) frame_data.size(); k += 5) { int x = MostCV::toType(frame_data[k + 0], 0); int y = MostCV::toType(frame_data[k + 1], 0); int w = MostCV::toType(frame_data[k + 2], 0); int h = MostCV::toType(frame_data[k + 3], 0); string activity_str = frame_data[k + 4]; players_activities_freq_map[activity_str]++; min_max_persons_y.first = std::min(min_max_persons_y.first, y); min_max_persons_y.second = std::max(min_max_persons_y.second, y + h); person.bbox_ = RectHelper(Rect(x, y, w, h)); person.action_id_ = MostCV::UpdateStringIdMap(persons_actions_ids_map, activity_str); annot_frame_id_persons_map_[frame_id].push_back(person); } if (min_max_persons_y.first < 0) min_max_persons_y.first = 0; annot_frame_id_to_min_max_persons_y_map_[frame_id] = min_max_persons_y; annot_frame_id_vec_.push_back(frame_id); string video_id_frame_id = video_id + "#"+frame_id; global_video_id_frame_id_to_activityId[video_id_frame_id] = frame_activity_id; global_video_id_frame_id_to_persons[video_id_frame_id] = annot_frame_id_persons_map_[frame_id]; if (annot_frame_id_persons_map_[frame_id].size() < 7) { cerr<<"video "< 12) { cerr<<"video "< &persons = frame_persons_kv.second; sort(persons.begin(), persons.end(), [](const VolleyballPerson &a, const VolleyballPerson &b) { if(a.bbox_.r.x != b.bbox_.r.x) return a.bbox_.r.x < b.bbox_.r.x; return a.bbox_.r.y < b.bbox_.r.y; }); } } void VolleyballVideoData::ResetPersons(string img_name, vector rects) { annot_frame_id_persons_map_[img_name].clear(); for (auto rect : rects) { VolleyballPerson person; person.bbox_ = rect; person.action_id_ = 0; annot_frame_id_persons_map_[img_name].push_back(person); } } vector VolleyballVideoData::GetPersonsRect(string frame_id) { vector rects; for (auto person : annot_frame_id_persons_map_[frame_id]) rects.push_back(person.bbox_); return rects; } // Short Util string VolleyballVideoData::GetFramePath(string frame_id, int shift) { string frame_id_no_ext = frame_id.substr(0, frame_id.find_first_of('.')); string ext = frame_id.substr(frame_id.find_first_of('.')); string target_frame_id = MostCV::toIntStr(frame_id_no_ext, shift, false); string frame_new_path = video_dir_ + frame_id_no_ext + MostCV::PATH_SEP + target_frame_id + ext; assert(boost::filesystem::exists(frame_new_path)); return frame_new_path; } pair, vector > VolleyballVideoData::GetTemporalWindowPaths(string frame_id, int temporal_window, int step, bool is_use_expend_factor) { vector window_frames_after; vector window_frames_before; if (is_use_expend_factor) temporal_window = 2 * temporal_window + 1; LP(w, 1+temporal_window/2) { string path = GetFramePath(frame_id, -w * step); window_frames_before.push_back(path); } LP(w, (temporal_window+1)/2) { string path = GetFramePath(frame_id, w * step); window_frames_after.push_back(path); } return {window_frames_before, window_frames_after}; } vector VolleyballVideoData::GetTemporalWindowPathsMerged(string frame_id, int temporal_window, int step) { vector paths; int start = -temporal_window/2; LP(w, temporal_window) { string path = GetFramePath(frame_id, start * step); paths.push_back(path); ++start; } return paths; } void VolleyballVideoData::visualize() { for (auto frame_id : annot_frame_id_vec_) { string path = GetFramePath(frame_id); Mat img = cv::imread(path); cerr< video_ids) { for (int i = 0; i < (int) video_ids.size(); ++i) { for (int j = 0; j < (int) ids_.size(); ++j) { if (video_ids[i] != ids_[j]) continue; std::swap(ids_[i], ids_[j]); std::swap(videos_vec_[i], videos_vec_[j]); } } } vector > VolleyballDatasetPart::GetVideoFrameList(bool is_shuffled, int subset_percent) { vector > database_shuffled; return database_shuffled; boost::mt19937 generator(100); boost::uniform_int<> uni_dist; boost::variate_generator > rand_generator(generator, uni_dist); vector labels; for (auto video : videos_vec_) { int frame_pos = -1; for (auto frame_id : video.annot_frame_id_vec_) { ++frame_pos; database_shuffled.push_back(std::make_pair(video, frame_pos)); } } if (is_shuffled) { cerr << "Before: Total Shuffled Elements: " << database_shuffled.size() << " with 1st video" << database_shuffled.begin()->first.video_id_ << "\n"; std::random_shuffle(database_shuffled.begin(), database_shuffled.end(), rand_generator); cerr << "After: Total Shuffled Elements: " << database_shuffled.size() << " with 1st video" << database_shuffled.begin()->first.video_id_ << "\n"; } int max_size = subset_percent * database_shuffled.size(); database_shuffled.resize(max_size); return database_shuffled; } void VolleyballDatasetPart::visualize() { for (auto video : videos_vec_) video.visualize(); } //--------------------------------------------------------------- VolleyballDatasetMgr::VolleyballDatasetMgr(string config_dir_path, string videos_root_dir) { MostCV::fixDir(config_dir_path); dataset_division_.push_back(VolleyballDatasetPart("train", config_dir_path + "train.txt", videos_root_dir)); dataset_division_.push_back(VolleyballDatasetPart("val", config_dir_path + "val.txt", videos_root_dir)); dataset_division_.push_back(VolleyballDatasetPart("test", config_dir_path + "test.txt", videos_root_dir)); dataset_division_.push_back(VolleyballDatasetPart("trainval", config_dir_path + "trainval.txt", videos_root_dir)); total_videos_ = 0; total_frames_ = 0; // Remove empty datasets for (int i = 0; i < (int) dataset_division_.size(); ++i) { if (dataset_division_[i].videos_vec_.size() == 0) { cerr << dataset_division_[i].dataset_name_ << " dataset is EMPTY\n"; dataset_division_.erase(dataset_division_.begin() + i); --i; } } assert(dataset_division_.size() > 0); for (auto dataset : dataset_division_) { int current_fames = 0; for (auto video : dataset.videos_vec_) { total_frames_ += video.annot_frame_id_vec_.size(); current_fames += video.annot_frame_id_vec_.size(); } cerr << "Total frames for dataset " << dataset.dataset_name_ << " = " << current_fames << "\n"; total_videos_ += dataset.videos_vec_.size(); } total_scene_labels = scene_activities_ids_map.size(); total_persons_labels = persons_actions_ids_map.size(); cerr << "\nTotal videos = " << total_videos_ << " - total frames = " << total_frames_ << "\n"; cerr << "\nScenes Labels:\n"; for (auto scene_kv : scene_activities_ids_map) cerr << "\t" << scene_kv.first << " " << scene_kv.second << "\n"; cerr << "\nPersons Labels:\n"; for (auto persons_kv : persons_actions_ids_map) cerr << "\t" << persons_kv.first << " " << persons_kv.second << "\n"; cerr << "\nScenes Labels frequency:\n"; for (auto entry : scene_activities_freq_map) cerr << "\t" << entry.first << " " << entry.second << "\n"; cerr << "\nPlayers Labels frequency:\n"; for (auto entry : players_activities_freq_map) cerr << "\t" << entry.first << " " << entry.second << "\n"; } int VolleyballDatasetMgr::GetActivityId(string video_id, string frame_id) { string video_id_frame_id = video_id + "#"+frame_id; if (global_video_id_frame_id_to_activityId.count(video_id_frame_id) == 0) { cerr<<"problem with "< VolleyballDatasetMgr::GetPersons(string video_id, string frame_id) { string video_id_frame_id = video_id + "#"+frame_id; assert( global_video_id_frame_id_to_persons.count(video_id_frame_id) ); return global_video_id_frame_id_to_persons[video_id_frame_id]; } // verify 2*w+1 elements..e.g. centered around every frame void VolleyballDatasetMgr::VerifyDataAvailbility(int temporal_window) { for (auto dataset : dataset_division_) { cerr<<"Verifying dataset: "< #include #include using std::vector; using std::set; using std::string; using std::pair; #include "opencv2/core/core.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" using cv::Mat; using cv::Size; using cv::Ptr; #include "rect-helper.h" namespace MostCV { class VolleyballPerson { public: RectHelper bbox_; int action_id_; }; class VolleyballVideoData { public: VolleyballVideoData() {} VolleyballVideoData(string video_id, string video_dir); string GetFramePath(string frame_id, int shift = 0); pair< vector, vector > GetTemporalWindowPaths(string frame_id, int temporal_window, int step = 1, bool is_use_expend_factor = true); vector GetTemporalWindowPathsMerged(string frame_id, int temporal_window, int step = 1); void ResetPersons(string frame_id, vector rects); vector GetPersonsRect(string frame_id); void SortPersonsPerFrames(); void visualize(); string video_id_; string video_dir_; vector annot_frame_id_vec_; map annot_frame_id_to_activity_id_map_; map> annot_frame_id_to_min_max_persons_y_map_; map > annot_frame_id_persons_map_; }; class VolleyballDatasetPart { public: VolleyballDatasetPart() {} VolleyballDatasetPart(string dataset_name, string config_file, string videos_root_dir); void ReorderVideos(vector video_ids); vector > GetVideoFrameList(bool is_shuffled, int subset_percent); void visualize(); vector ids_; vector videos_vec_; string dataset_name_; string dataset_db_name_; string dataset_db_path_; }; class VolleyballDatasetMgr { public: VolleyballDatasetMgr(string config_dir_path, string videos_root_dir); void VerifyDataAvailbility(int temporal_window); int GetActivityId(string video_id, string frame_id); vector GetPersons(string video_id, string frame_id); vector dataset_division_; int total_videos_; int total_frames_; int total_scene_labels; int total_persons_labels; }; } #endif /* VOLLEYBALL_DATASET_MGR_H_FINAL_DATASET_ */ ================================================ FILE: ibrahim16-cvpr/p1-network1/clip_w5.txt ================================================ examples/deep-activity-rec/ibrahim16-cvpr/none.jpg 0 examples/deep-activity-rec/ibrahim16-cvpr/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr/none.jpg 1 ================================================ FILE: ibrahim16-cvpr/p1-network1/trainval-test-create-mean-script.sh ================================================ #!/usr/bin/env sh # This script converts the vollyball data into leveldb format. OUTDIR=examples/deep-activity-rec/ibrahim16-cvpr/p1-network1 echo "Computing image mean for trainval dataset: " $OUTDIR ./build/tools/compute_image_mean -backend=leveldb $OUTDIR/trainval-leveldb $OUTDIR/mean.binaryproto echo "Done." ================================================ FILE: ibrahim16-cvpr/p1-network1/trainval-test-exe-script-resume.sh ================================================ #!/usr/bin/env sh OUTDIR=examples/deep-activity-rec/ibrahim16-cvpr/p1-network1 GPU_ID=0 ITER=15000 echo "Resuming Caffe using GPU" $GPU "In Directory " $OUTDIR "Starting from iteration " $ITER ./build/tools/caffe train 2> $OUTDIR/z_trainval-test-log-resume.txt \ --solver $OUTDIR/trainval-test-solver.prototxt --snapshot=$OUTDIR/z_snapshot_iter_$ITER.solverstate --gpu $GPU_ID ================================================ FILE: ibrahim16-cvpr/p1-network1/trainval-test-exe-script.sh ================================================ #!/usr/bin/env sh OUTDIR=examples/deep-activity-rec/ibrahim16-cvpr/p1-network1 GPU_ID=0 echo "Running Caffe using GPU" $GPU "In Directory " $OUTDIR ./build/tools/caffe train 2> $OUTDIR/z_trainval-test-log.txt \ --solver $OUTDIR/trainval-test-solver.prototxt --weights models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel --gpu $GPU_ID ================================================ FILE: ibrahim16-cvpr/p1-network1/trainval-test-network.prototxt ================================================ name: "volleyball_game_proto" layer { name: "clip_data" type: "ImageData" top: "dummy" top: "clip" image_data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr/p1-network1/clip_w5.txt" batch_size: 250 } } layer { name: "Silence" type: "Silence" bottom: "dummy" } layer { name: "volleyball_game" type: "Data" top: "data" top: "label" include { phase: TRAIN } transform_param { mirror: true crop_size: 227 mean_file: "examples/deep-activity-rec/ibrahim16-cvpr/p1-network1/mean.binaryproto" } data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr/p1-network1/trainval-leveldb" batch_size: 250 backend: LEVELDB } } layer { name: "volleyball_game" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { mirror: false crop_size: 227 mean_file: "examples/deep-activity-rec/ibrahim16-cvpr/p1-network1/mean.binaryproto" } data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr/p1-network1/test-leveldb" batch_size: 250 backend: LEVELDB } } layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 96 kernel_size: 11 stride: 4 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm1" type: "LRN" bottom: "pool1" top: "norm1" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv2" type: "Convolution" bottom: "norm1" top: "conv2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm2" type: "LRN" bottom: "pool2" top: "norm2" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv3" type: "Convolution" bottom: "norm2" top: "conv3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" } layer { name: "conv4" type: "Convolution" bottom: "conv3" top: "conv4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" } layer { name: "conv5" type: "Convolution" bottom: "conv4" top: "conv5" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5" } layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "fc6" type: "InnerProduct" bottom: "pool5" top: "fc6" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu6" type: "ReLU" bottom: "fc6" top: "fc6" } layer { name: "drop6" type: "Dropout" bottom: "fc6" top: "fc6" dropout_param { dropout_ratio: 0.5 } } layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" # Note that lr_mult can be set to 0 to disable any fine-tuning of this, and any other, layer param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu7" type: "ReLU" bottom: "fc7" top: "fc7" } layer { name: "drop7" type: "Dropout" bottom: "fc7" top: "fc7" dropout_param { dropout_ratio: 0.5 } } layer { name: "lstm1" type: "Lstm" bottom: "fc7" bottom: "clip" top: "lstm1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } lstm_param { num_output: 3000 clipping_threshold: 0.1 weight_filler { type: "gaussian" std: 0.1 } bias_filler { type: "constant" } } } layer { name: "fc8_volleyball" type: "InnerProduct" bottom: "lstm1" top: "fc8_volleyball" # lr_mult is set to higher than for other layers, because this layer is starting from random while the others are already trained param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } inner_product_param { num_output: 9 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc8_volleyball" bottom: "label" } layer { name: "accuracy" type: "Accuracy" bottom: "fc8_volleyball" bottom: "label" top: "accuracy" include { phase: TEST } } ================================================ FILE: ibrahim16-cvpr/p1-network1/trainval-test-solver.prototxt ================================================ net: "examples/deep-activity-rec/ibrahim16-cvpr/p1-network1/trainval-test-network.prototxt" # testing examples are 77655 ~= 250 * 310 test_iter: 310 test_interval: 15000 display: 1000 base_lr: 0.00001 lr_policy: "step" gamma: 0.1 stepsize: 15000 max_iter: 15000 momentum: 0.9 weight_decay: 0.0005 random_seed: 750301 solver_mode: GPU snapshot: 5000 snapshot_prefix: "examples/deep-activity-rec/ibrahim16-cvpr/p1-network1/z_snapshot" snapshot_after_train: true ================================================ FILE: ibrahim16-cvpr/p3-extract-features-networks/test.prototxt ================================================ name: "volleyball_proto" layer { name: "volleyball" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { mirror: false crop_size: 227 mean_file: "examples/deep-activity-rec/ibrahim16-cvpr/p2-ready-fuse/mean.binaryproto" } data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr/p2-ready-fuse/test-leveldb" batch_size: 120 backend: LEVELDB } } layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 96 kernel_size: 11 stride: 4 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm1" type: "LRN" bottom: "pool1" top: "norm1" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv2" type: "Convolution" bottom: "norm1" top: "conv2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm2" type: "LRN" bottom: "pool2" top: "norm2" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv3" type: "Convolution" bottom: "norm2" top: "conv3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" } layer { name: "conv4" type: "Convolution" bottom: "conv3" top: "conv4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" } layer { name: "conv5" type: "Convolution" bottom: "conv4" top: "conv5" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5" } layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "fc6" type: "InnerProduct" bottom: "pool5" top: "fc6" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu6" type: "ReLU" bottom: "fc6" top: "fc6" } layer { name: "drop6" type: "Dropout" bottom: "fc6" top: "fc6" dropout_param { dropout_ratio: 0.5 } } layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" # Note that lr_mult can be set to 0 to disable any fine-tuning of this, and any other, layer param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu7" type: "ReLU" bottom: "fc7" top: "fc7" } layer { name: "drop7" type: "Dropout" bottom: "fc7" top: "fc7" dropout_param { dropout_ratio: 0.5 } } layer { name: "lstm1" type: "Lstm" bottom: "fc7" top: "lstm1" param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } lstm_param { num_output: 3000 clipping_threshold: 0.1 weight_filler { type: "gaussian" std: 0.1 } bias_filler { type: "constant" } } } layer { name: "fc8_volleyball" type: "InnerProduct" bottom: "lstm1" top: "fc8_volleyball" # lr_mult is set to higher than for other layers, because this layer is starting from random while the others are already trained param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } inner_product_param { num_output: 9 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc8_volleyball" bottom: "label" } layer { name: "accuracy" type: "Accuracy" bottom: "fc8_volleyball" bottom: "label" top: "accuracy" include { phase: TEST } } layer { name: "prop" type: "Softmax" bottom: "fc8_volleyball" top: "prop" } layer { name: "Silence" type: "Silence" bottom: "prop" } ================================================ FILE: ibrahim16-cvpr/p3-extract-features-networks/trainval.prototxt ================================================ name: "volleyball_proto" layer { name: "volleyball" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { mirror: false crop_size: 227 mean_file: "examples/deep-activity-rec/ibrahim16-cvpr/p2-ready-fuse/mean.binaryproto" } data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr/p2-ready-fuse/trainval-leveldb" batch_size: 120 backend: LEVELDB } } layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 96 kernel_size: 11 stride: 4 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm1" type: "LRN" bottom: "pool1" top: "norm1" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv2" type: "Convolution" bottom: "norm1" top: "conv2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm2" type: "LRN" bottom: "pool2" top: "norm2" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv3" type: "Convolution" bottom: "norm2" top: "conv3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" } layer { name: "conv4" type: "Convolution" bottom: "conv3" top: "conv4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" } layer { name: "conv5" type: "Convolution" bottom: "conv4" top: "conv5" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5" } layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "fc6" type: "InnerProduct" bottom: "pool5" top: "fc6" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu6" type: "ReLU" bottom: "fc6" top: "fc6" } layer { name: "drop6" type: "Dropout" bottom: "fc6" top: "fc6" dropout_param { dropout_ratio: 0.5 } } layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" # Note that lr_mult can be set to 0 to disable any fine-tuning of this, and any other, layer param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu7" type: "ReLU" bottom: "fc7" top: "fc7" } layer { name: "drop7" type: "Dropout" bottom: "fc7" top: "fc7" dropout_param { dropout_ratio: 0.5 } } layer { name: "lstm1" type: "Lstm" bottom: "fc7" top: "lstm1" param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } lstm_param { num_output: 3000 clipping_threshold: 0.1 weight_filler { type: "gaussian" std: 0.1 } bias_filler { type: "constant" } } } layer { name: "fc8_volleyball" type: "InnerProduct" bottom: "lstm1" top: "fc8_volleyball" # lr_mult is set to higher than for other layers, because this layer is starting from random while the others are already trained param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } inner_product_param { num_output: 9 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc8_volleyball" bottom: "label" } layer { name: "accuracy" type: "Accuracy" bottom: "fc8_volleyball" bottom: "label" top: "accuracy" include { phase: TEST } } layer { name: "prop" type: "Softmax" bottom: "fc8_volleyball" top: "prop" } layer { name: "Silence" type: "Silence" bottom: "prop" } ================================================ FILE: ibrahim16-cvpr/p4-network2/clip_w10.txt ================================================ examples/deep-activity-rec/ibrahim16-cvpr/none.jpg 0 examples/deep-activity-rec/ibrahim16-cvpr/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr/none.jpg 1 ================================================ FILE: ibrahim16-cvpr/p4-network2/trainval-test-exe-script-resume.sh ================================================ #!/usr/bin/env sh OUTDIR=examples/deep-activity-rec/ibrahim16-cvpr/p4-network2 GPU_ID=0 ITER=10000 echo "Resuming Caffe using GPU" $GPU "In Directory " $OUTDIR "Starting from iteration " $ITER ./build/tools/caffe train 2> $OUTDIR/z_trainval-test-log-resume.txt \ --solver $OUTDIR/trainval-test-solver.prototxt --snapshot=$OUTDIR/z_snapshot_iter_$ITER.solverstate --gpu $GPU_ID ================================================ FILE: ibrahim16-cvpr/p4-network2/trainval-test-exe-script.sh ================================================ #!/usr/bin/env sh OUTDIR=examples/deep-activity-rec/ibrahim16-cvpr/p4-network2 GPU_ID=0 echo "Running Caffe using GPU" $GPU "In Directory " $OUTDIR ./build/tools/caffe train 2> $OUTDIR/z_trainval-test-log.txt \ --solver $OUTDIR/trainval-test-solver.prototxt --gpu $GPU_ID ================================================ FILE: ibrahim16-cvpr/p4-network2/trainval-test-network.prototxt ================================================ name: "volleyball_level2" layer { name: "clip_data" type: "ImageData" top: "dummy" top: "clip" image_data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr/p4-network2/clip_w10.txt" batch_size: 250 } } layer { name: "Silence" type: "Silence" bottom: "dummy" } layer { name: "volleyball_level2" type: "Data" top: "data" top: "label" include { phase: TRAIN } data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr/p4-network2/trainval-leveldb" batch_size: 250 backend: LEVELDB } } layer { name: "volleyball_level2" type: "Data" top: "data" top: "label" include { phase: TEST } data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr/p4-network2/test-leveldb" batch_size: 10 backend: LEVELDB } } layer { name: "fc1" type: "InnerProduct" bottom: "data" top: "fc1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 3000 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu7" type: "ReLU" bottom: "fc1" top: "fc1" } layer { name: "drop7" type: "Dropout" bottom: "fc1" top: "fc1" dropout_param { dropout_ratio: 0.5 } } layer { name: "lstm1" type: "Lstm" bottom: "fc1" bottom: "clip" top: "lstm1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } lstm_param { num_output: 1000 clipping_threshold: 0.1 weight_filler { type: "gaussian" std: 0.1 } bias_filler { type: "constant" } } } layer { name: "fc_last" type: "InnerProduct" bottom: "lstm1" top: "fc_last" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 8 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc_last" bottom: "label" } layer { name: "accuracy" type: "Accuracy" bottom: "fc_last" bottom: "label" top: "accuracy" include { phase: TEST } } ================================================ FILE: ibrahim16-cvpr/p4-network2/trainval-test-solver.prototxt ================================================ net: "examples/deep-activity-rec/ibrahim16-cvpr/p4-network2/trainval-test-network.prototxt" # testing examples are 13370 = 1337 * 10. test_iter: 1337 test_interval: 2000 display: 2000 base_lr: 0.0001 lr_policy: "step" gamma: 0.1 stepsize: 10000 max_iter: 20000 momentum: 0.9 weight_decay: 0.0005 random_seed: 750301 solver_mode: GPU snapshot: 5000 snapshot_prefix: "examples/deep-activity-rec/ibrahim16-cvpr/p4-network2/z_snapshot" snapshot_after_train: true ================================================ FILE: ibrahim16-cvpr/p4-network2/trainval-test-window-evaluation-exe-script.sh ================================================ #!/usr/bin/env sh OUTDIR=examples/deep-activity-rec/ibrahim16-cvpr/p4-network2 WINDOW=10 GPU_ID=0 TEST_EXAMPLES=1337 ITER=20000 LAYER=prop examples/deep-activity-rec/exePhase4 \ $WINDOW \ GPU $GPU_ID \ $OUTDIR/z_snapshot_iter_$ITER.caffemodel \ $OUTDIR/trainval-test-window-evaluation-network.prototxt \ $LAYER \ $TEST_EXAMPLES \ 2>&1 | tee $OUTDIR/z_trainval-test-window-evaluation-log-prop.txt ================================================ FILE: ibrahim16-cvpr/p4-network2/trainval-test-window-evaluation-network.prototxt ================================================ name: "volleyball_level2" layer { name: "volleyball_data" type: "Data" top: "data" top: "label" include { phase: TEST } data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr/p4-network2/test-leveldb" batch_size: 10 backend: LEVELDB } } layer { name: "fc1" type: "InnerProduct" bottom: "data" top: "fc1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 3000 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu7" type: "ReLU" bottom: "fc1" top: "fc1" } layer { name: "drop7" type: "Dropout" bottom: "fc1" top: "fc1" dropout_param { dropout_ratio: 0.5 } } layer { name: "lstm1" type: "Lstm" bottom: "fc1" top: "lstm1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } lstm_param { num_output: 1000 clipping_threshold: 0.1 weight_filler { type: "gaussian" std: 0.1 } bias_filler { type: "constant" } } } layer { name: "fc_last" type: "InnerProduct" bottom: "lstm1" top: "fc_last" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 8 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc_last" bottom: "label" } layer { name: "accuracy" type: "Accuracy" bottom: "fc_last" bottom: "label" top: "accuracy" include { phase: TEST } } layer { name: "prop" type: "Softmax" bottom: "fc_last" top: "prop" } layer { name: "argmax" type: "ArgMax" argmax_param { out_max_val: false top_k: 1 } bottom: "prop" top: "argmax" } ================================================ FILE: ibrahim16-cvpr/script-clean.sh ================================================ #!/usr/bin/env bash ROOT_DIR=examples/deep-activity-rec/ibrahim16-cvpr # Phase 1 artifacts rm -r $ROOT_DIR/p1-network1/test-leveldb rm -r $ROOT_DIR/p1-network1/trainval-leveldb rm $ROOT_DIR/p1-network1/mean.binaryproto rm $ROOT_DIR/p1-network1/z_log_dataset_net1.txt rm $ROOT_DIR/p1-network1/z_trainval-test-log.txt rm $ROOT_DIR/p1-network1/z_snapshot_iter_*.caffemodel rm $ROOT_DIR/p1-network1/z_snapshot_iter_*.solverstate # Phasse 2 rm -r $ROOT_DIR/p2-ready-fuse # Phasse 3 & 4 rm -r $ROOT_DIR/p4-network2/test-leveldb rm -r $ROOT_DIR/p4-network2/trainval-leveldb rm $ROOT_DIR/p4-network2/z_log_dataset_net2.txt rm $ROOT_DIR/p4-network2/z_trainval-test-log.txt rm $ROOT_DIR/p4-network2/z_trainval-test-window-evaluation-log-prop.txt rm $ROOT_DIR/p4-network2/z_snapshot_iter_*.caffemodel rm $ROOT_DIR/p4-network2/z_snapshot_iter_*.solverstate ================================================ FILE: ibrahim16-cvpr/script-p1-data.sh ================================================ #!/usr/bin/env sh CAFFE=/cs/vml2/msibrahi/workspaces/caffe-lstm GIT_PROJ_DIR=$CAFFE/examples/deep-activity-rec DATASET_VIDEOS=/cs/vml2/msibrahi/Datasets/Greg-Volleyball/volleyball DATASET_CONFIG=$GIT_PROJ_DIR/dataset-config OUTPUT_DIR=$GIT_PROJ_DIR/ibrahim16-cvpr TRAIN_SRC=trainval TEST_SRC=test WINDOW_NETWORK1=5 WINDOW_NETWORK2=10 STEP=1 GPU_ID=0 NETWORK1_HIDDEN=3000 NETWORK1_TRAIN_ITERS=15000 # Fusion Styles: Choose 0-7 # 0 => Conc / 1 group 1 => Max / 1 group 4 => Avg / 1 group 7 => sum / 1 group # 2 => Max / 2 groups 5 => Avg / 2 groups 3 => Max / 4 groups 6 => Avg / 4 groups FUSION_STYLE=2 FUSION_TRAIN_ITER=3493 FUSION_TEST_ITER=1337 VAR_FUSION_LAYERS_VAL="2 fc7 lstm1" VAR_FUSION_LAYERS="FUSION_LAYERS" declare "$VAR_FUSION_LAYERS=$VAR_FUSION_LAYERS_VAL" NETWORK1_DIR=$OUTPUT_DIR/p1-network1 NETWORK1_MODEL_PATH=$NETWORK1_DIR/z_snapshot_iter_$NETWORK1_TRAIN_ITERS.caffemodel NETWORK2_LEVELDB_FUSION_DIR=$OUTPUT_DIR/p2-ready-fuse NETWORK2_EXTRACTION_NETOWRK_DIR=$OUTPUT_DIR/p3-extract-features-networks NETWORK2_DIR=$OUTPUT_DIR/p4-network2 # Programs EXE_P1_NETWORK1=exePhase1_2 EXE_P2_FUSE=exePhase1_2 EXE_P4_NETWORK2=exePhase3 ########################################################################### echo ------------------------------------------------------ echo echo "START processing script" "$0" echo "OUTPUT Directory is " $OUTPUT_DIR echo echo Doing path VALIDATIONS ## Some directories / files validation [ -d $CAFFE ] || echo Directory $CAFFE NOOOT exist [ -d $OUTPUT_DIR ] || echo Directory $OUTPUT_DIR NOOOT exist [ -d $DATASET_VIDEOS ] || echo Directory $DATASET_VIDEOS NOOOT exist [ -d $DATASET_CONFIG ] || echo Directory $DATASET_CONFIG NOOOT exist [ -d $NETWORK1_DIR ] || echo Directory $NETWORK1_DIR NOOOT exist [ -d $NETWORK2_EXTRACTION_NETOWRK_DIR ] || echo Directory $NETWORK2_EXTRACTION_NETOWRK_DIR NOOOT exist [ -d $NETWORK2_DIR ] || echo Directory $NETWORK2_DIR NOOOT exist echo READY...STEADY...Gooo ? read -t 60 cd $CAFFE ########################################################################### echo ------------------------------------------------------ echo Phase 1 - Generating Network 1 Data - $NETWORK1_DIR # Clean if some previous wrong data [ -d $NETWORK1_DIR/$TRAIN_SRC-leveldb ] && [ ! -d $NETWORK1_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK1_DIR/$TRAIN_SRC-leveldb [ ! -d $NETWORK1_DIR/$TRAIN_SRC-leveldb ] && [ -d $NETWORK1_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK1_DIR/$TEST_SRC-leveldb $GIT_PROJ_DIR/$EXE_P1_NETWORK1 \ $DATASET_VIDEOS \ $DATASET_CONFIG \ $NETWORK1_DIR $WINDOW_NETWORK1 $STEP 1 \ 2>&1 | tee \ $NETWORK1_DIR/z_log_dataset_net1.txt echo ======================== echo Phase 1 - B - Computing Mean of Network 1 Training Data - $NETWORK1_DIR $NETWORK1_DIR/$TRAIN_SRC-$TEST_SRC-create-mean-script.sh read -t 10 ================================================ FILE: ibrahim16-cvpr/script-p1-train-p3-p4.sh ================================================ #!/usr/bin/env sh CAFFE=/cs/vml2/msibrahi/workspaces/caffe-lstm GIT_PROJ_DIR=$CAFFE/examples/deep-activity-rec DATASET_VIDEOS=/cs/vml2/msibrahi/Datasets/Greg-Volleyball/volleyball DATASET_CONFIG=$GIT_PROJ_DIR/dataset-config OUTPUT_DIR=$GIT_PROJ_DIR/ibrahim16-cvpr TRAIN_SRC=trainval TEST_SRC=test WINDOW_NETWORK1=5 WINDOW_NETWORK2=10 STEP=1 GPU_ID=0 NETWORK1_HIDDEN=3000 NETWORK1_TRAIN_ITERS=15000 # Fusion Styles: Choose 0-7 # 0 => Conc / 1 group 1 => Max / 1 group 4 => Avg / 1 group 7 => sum / 1 group # 2 => Max / 2 groups 5 => Avg / 2 groups 3 => Max / 4 groups 6 => Avg / 4 groups FUSION_STYLE=2 FUSION_TRAIN_ITER=3493 FUSION_TEST_ITER=1337 VAR_FUSION_LAYERS_VAL="2 fc7 lstm1" VAR_FUSION_LAYERS="FUSION_LAYERS" declare "$VAR_FUSION_LAYERS=$VAR_FUSION_LAYERS_VAL" NETWORK1_DIR=$OUTPUT_DIR/p1-network1 NETWORK1_MODEL_PATH=$NETWORK1_DIR/z_snapshot_iter_$NETWORK1_TRAIN_ITERS.caffemodel NETWORK2_LEVELDB_FUSION_DIR=$OUTPUT_DIR/p2-ready-fuse NETWORK2_EXTRACTION_NETOWRK_DIR=$OUTPUT_DIR/p3-extract-features-networks NETWORK2_DIR=$OUTPUT_DIR/p4-network2 # Programs EXE_P1_NETWORK1=exePhase1_2 EXE_P2_FUSE=exePhase1_2 EXE_P4_NETWORK2=exePhase3 ########################################################################### echo ------------------------------------------------------ echo echo "START processing script" "$0" echo "OUTPUT Directory is " $OUTPUT_DIR echo echo Doing path VALIDATIONS ## Some directories / files validation [ -d $CAFFE ] || echo Directory $CAFFE NOOOT exist [ -d $OUTPUT_DIR ] || echo Directory $OUTPUT_DIR NOOOT exist [ -d $DATASET_VIDEOS ] || echo Directory $DATASET_VIDEOS NOOOT exist [ -d $DATASET_CONFIG ] || echo Directory $DATASET_CONFIG NOOOT exist [ -d $NETWORK1_DIR ] || echo Directory $NETWORK1_DIR NOOOT exist [ -d $NETWORK2_EXTRACTION_NETOWRK_DIR ] || echo Directory $NETWORK2_EXTRACTION_NETOWRK_DIR NOOOT exist [ -d $NETWORK2_DIR ] || echo Directory $NETWORK2_DIR NOOOT exist echo READY...STEADY...Gooo ? read -t 60 cd $CAFFE ########################################################################### echo ======================== echo Phase 1 - C - Network 1 Training $NETWORK1_DIR/$TRAIN_SRC-$TEST_SRC-exe-script.sh read -t 10 ############### echo ------------------------------------------------------ # Info: # iterations (e.g. 10863 = 17 * 639. 639 is the # of test cases. # Info: Inside the prototxt, a batch # equal to # of persons (e.g. 5). 17 = 2 * 8 +1. 8 is the right temporal width echo Phase 3 - Generarting LSTM 2 Data - $NETWORK2_DIR # Clean if some previous wrong data [ -d $NETWORK2_DIR/$TRAIN_SRC-leveldb ] && [ ! -d $NETWORK2_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK2_DIR/$TRAIN_SRC-leveldb [ ! -d $NETWORK2_DIR/$TRAIN_SRC-leveldb ] && [ -d $NETWORK2_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK2_DIR/$TEST_SRC-leveldb $GIT_PROJ_DIR/$EXE_P4_NETWORK2 \ $FUSION_STYLE $WINDOW_NETWORK2 GPU $GPU_ID \ $NETWORK1_MODEL_PATH \ $NETWORK2_EXTRACTION_NETOWRK_DIR/$TRAIN_SRC.prototxt \ $FUSION_LAYERS \ $NETWORK2_DIR/$TRAIN_SRC-leveldb \ $FUSION_TRAIN_ITER \ $FUSION_STYLE $WINDOW_NETWORK2 GPU $GPU_ID \ $NETWORK1_MODEL_PATH \ $NETWORK2_EXTRACTION_NETOWRK_DIR/$TEST_SRC.prototxt \ $FUSION_LAYERS \ $NETWORK2_DIR/$TEST_SRC-leveldb \ $FUSION_TEST_ITER \ 2>&1 | tee \ $NETWORK2_DIR/z_log_dataset_net2.txt read -t 10 ############### echo ------------------------------------------------------ echo Phase 4 - A - LSTM 2 Training - $NETWORK2_DIR $NETWORK2_DIR/$TRAIN_SRC-$TEST_SRC-exe-script.sh echo ======================== echo Phase 4 - B - Temporal Evaluation $NETWORK2_DIR/$TRAIN_SRC-$TEST_SRC-window-evaluation-exe-script.sh ############### echo ------------------------------------------------------ echo echo DONE processing script "$0" echo echo ------------------------------------------------------ ================================================ FILE: ibrahim16-cvpr/script-p2-data-fuse.sh ================================================ #!/usr/bin/env sh CAFFE=/cs/vml2/msibrahi/workspaces/caffe-lstm GIT_PROJ_DIR=$CAFFE/examples/deep-activity-rec DATASET_VIDEOS=/cs/vml2/msibrahi/Datasets/Greg-Volleyball/volleyball DATASET_CONFIG=$GIT_PROJ_DIR/dataset-config OUTPUT_DIR=$GIT_PROJ_DIR/ibrahim16-cvpr TRAIN_SRC=trainval TEST_SRC=test WINDOW_NETWORK1=5 WINDOW_NETWORK2=10 STEP=1 GPU_ID=0 NETWORK1_HIDDEN=3000 NETWORK1_TRAIN_ITERS=15000 # Fusion Styles: Choose 0-7 # 0 => Conc / 1 group 1 => Max / 1 group 4 => Avg / 1 group 7 => sum / 1 group # 2 => Max / 2 groups 5 => Avg / 2 groups 3 => Max / 4 groups 6 => Avg / 4 groups FUSION_STYLE=2 FUSION_TRAIN_ITER=3493 FUSION_TEST_ITER=1337 VAR_FUSION_LAYERS_VAL="2 fc7 lstm1" VAR_FUSION_LAYERS="FUSION_LAYERS" declare "$VAR_FUSION_LAYERS=$VAR_FUSION_LAYERS_VAL" NETWORK1_DIR=$OUTPUT_DIR/p1-network1 NETWORK1_MODEL_PATH=$NETWORK1_DIR/z_snapshot_iter_$NETWORK1_TRAIN_ITERS.caffemodel NETWORK2_LEVELDB_FUSION_DIR=$OUTPUT_DIR/p2-ready-fuse NETWORK2_EXTRACTION_NETOWRK_DIR=$OUTPUT_DIR/p3-extract-features-networks NETWORK2_DIR=$OUTPUT_DIR/p4-network2 # Programs EXE_P1_NETWORK1=exePhase1_2 EXE_P2_FUSE=exePhase1_2 EXE_P4_NETWORK2=exePhase3 ########################################################################### echo ------------------------------------------------------ echo echo "START processing script" "$0" echo "OUTPUT Directory is " $OUTPUT_DIR echo echo Doing path VALIDATIONS ## Some directories / files validation [ -d $CAFFE ] || echo Directory $CAFFE NOOOT exist [ -d $OUTPUT_DIR ] || echo Directory $OUTPUT_DIR NOOOT exist [ -d $DATASET_VIDEOS ] || echo Directory $DATASET_VIDEOS NOOOT exist [ -d $DATASET_CONFIG ] || echo Directory $DATASET_CONFIG NOOOT exist [ -d $NETWORK1_DIR ] || echo Directory $NETWORK1_DIR NOOOT exist [ -d $NETWORK2_EXTRACTION_NETOWRK_DIR ] || echo Directory $NETWORK2_EXTRACTION_NETOWRK_DIR NOOOT exist [ -d $NETWORK2_DIR ] || echo Directory $NETWORK2_DIR NOOOT exist echo READY...STEADY...Gooo ? read -t 60 cd $CAFFE ############### echo ------------------------------------------------------ echo Phase 2 - A - Generating Data to be Fused - $NETWORK2_LEVELDB_FUSION_DIR mkdir -p $NETWORK2_LEVELDB_FUSION_DIR # Clean if some previous wrong data [ -d $NETWORK2_LEVELDB_FUSION_DIR/$TRAIN_SRC-leveldb ] && [ ! -d $NETWORK2_LEVELDB_FUSION_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK2_LEVELDB_FUSION_DIR/$TRAIN_SRC-leveldb [ ! -d $NETWORK2_LEVELDB_FUSION_DIR/$TRAIN_SRC-leveldb ] && [ -d $NETWORK2_LEVELDB_FUSION_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK2_LEVELDB_FUSION_DIR/$TEST_SRC-leveldb $GIT_PROJ_DIR/$EXE_P2_FUSE \ $DATASET_VIDEOS \ $DATASET_CONFIG \ $NETWORK2_LEVELDB_FUSION_DIR $WINDOW_NETWORK2 $STEP 0 \ 2>&1 | tee \ $NETWORK2_LEVELDB_FUSION_DIR/z_log_dataset_fuse.txt echo ======================== echo Phase 2 - B - Creating Mean File of Fused Data echo "Computing image mean for dataset: " $TRAIN_SRC $CAFFE/build/tools/compute_image_mean -backend=leveldb $NETWORK2_LEVELDB_FUSION_DIR/$TRAIN_SRC-leveldb $NETWORK2_LEVELDB_FUSION_DIR/mean.binaryproto read -t 10 ############### echo ------------------------------------------------------ echo echo DONE processing script "$0" echo echo ------------------------------------------------------ ================================================ FILE: ibrahim16-cvpr/script.sh ================================================ #!/usr/bin/env sh CAFFE=/cs/vml2/msibrahi/workspaces/caffe-lstm GIT_PROJ_DIR=$CAFFE/examples/deep-activity-rec DATASET_VIDEOS=/cs/vml2/msibrahi/Datasets/Greg-Volleyball/volleyballVolleyball DATASET_CONFIG=$GIT_PROJ_DIR/dataset-config OUTPUT_DIR=$GIT_PROJ_DIR/ibrahim16-cvpr TRAIN_SRC=trainval TEST_SRC=test WINDOW_NETWORK1=5 WINDOW_NETWORK2=10 STEP=1 GPU_ID=0 NETWORK1_HIDDEN=3000 NETWORK1_TRAIN_ITERS=15000 # Fusion Styles: Choose 0-7 # 0 => Conc / 1 group 1 => Max / 1 group 4 => Avg / 1 group 7 => sum / 1 group # 2 => Max / 2 groups 5 => Avg / 2 groups 3 => Max / 4 groups 6 => Avg / 4 groups FUSION_STYLE=2 FUSION_TRAIN_ITER=3493 FUSION_TEST_ITER=1337 VAR_FUSION_LAYERS_VAL="2 fc7 lstm1" VAR_FUSION_LAYERS="FUSION_LAYERS" declare "$VAR_FUSION_LAYERS=$VAR_FUSION_LAYERS_VAL" NETWORK1_DIR=$OUTPUT_DIR/p1-network1 NETWORK1_MODEL_PATH=$NETWORK1_DIR/z_snapshot_iter_$NETWORK1_TRAIN_ITERS.caffemodel NETWORK2_LEVELDB_FUSION_DIR=$OUTPUT_DIR/p2-ready-fuse NETWORK2_EXTRACTION_NETOWRK_DIR=$OUTPUT_DIR/p3-extract-features-networks NETWORK2_DIR=$OUTPUT_DIR/p4-network2 # Programs EXE_P1_NETWORK1=exePhase1_2 EXE_P2_FUSE=exePhase1_2 EXE_P4_NETWORK2=exePhase3 ########################################################################### echo ------------------------------------------------------ echo echo "START processing script" "$0" echo "OUTPUT Directory is " $OUTPUT_DIR echo echo Doing path VALIDATIONS ## Some directories / files validation [ -d $CAFFE ] || echo Directory $CAFFE NOOOT exist [ -d $OUTPUT_DIR ] || echo Directory $OUTPUT_DIR NOOOT exist [ -d $DATASET_VIDEOS ] || echo Directory $DATASET_VIDEOS NOOOT exist [ -d $DATASET_CONFIG ] || echo Directory $DATASET_CONFIG NOOOT exist [ -d $NETWORK1_DIR ] || echo Directory $NETWORK1_DIR NOOOT exist [ -d $NETWORK2_EXTRACTION_NETOWRK_DIR ] || echo Directory $NETWORK2_EXTRACTION_NETOWRK_DIR NOOOT exist [ -d $NETWORK2_DIR ] || echo Directory $NETWORK2_DIR NOOOT exist echo READY...STEADY...Gooo ? read -t 60 cd $CAFFE ########################################################################### echo ------------------------------------------------------ echo Phase 1 - Generating Network 1 Data - $NETWORK1_DIR # Clean if some previous wrong data [ -d $NETWORK1_DIR/$TRAIN_SRC-leveldb ] && [ ! -d $NETWORK1_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK1_DIR/$TRAIN_SRC-leveldb [ ! -d $NETWORK1_DIR/$TRAIN_SRC-leveldb ] && [ -d $NETWORK1_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK1_DIR/$TEST_SRC-leveldb $GIT_PROJ_DIR/$EXE_P1_NETWORK1 \ $DATASET_VIDEOS \ $DATASET_CONFIG \ $NETWORK1_DIR $WINDOW_NETWORK1 $STEP 1 \ 2>&1 | tee \ $NETWORK1_DIR/z_log_dataset_net1.txt echo ======================== echo Phase 1 - B - Computing Mean of Network 1 Training Data - $NETWORK1_DIR $NETWORK1_DIR/$TRAIN_SRC-$TEST_SRC-create-mean-script.sh read -t 10 echo ======================== echo Phase 1 - C - Network 1 Training $NETWORK1_DIR/$TRAIN_SRC-$TEST_SRC-exe-script.sh read -t 10 ############### echo ------------------------------------------------------ echo Phase 2 - A - Generating Data to be Fused - $NETWORK2_LEVELDB_FUSION_DIR mkdir -p $NETWORK2_LEVELDB_FUSION_DIR # Clean if some previous wrong data [ -d $NETWORK2_LEVELDB_FUSION_DIR/$TRAIN_SRC-leveldb ] && [ ! -d $NETWORK2_LEVELDB_FUSION_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK2_LEVELDB_FUSION_DIR/$TRAIN_SRC-leveldb [ ! -d $NETWORK2_LEVELDB_FUSION_DIR/$TRAIN_SRC-leveldb ] && [ -d $NETWORK2_LEVELDB_FUSION_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK2_LEVELDB_FUSION_DIR/$TEST_SRC-leveldb $GIT_PROJ_DIR/$EXE_P2_FUSE \ $DATASET_VIDEOS \ $DATASET_CONFIG \ $NETWORK2_LEVELDB_FUSION_DIR $WINDOW_NETWORK2 $STEP 0 \ 2>&1 | tee \ $NETWORK2_LEVELDB_FUSION_DIR/z_log_dataset_fuse.txt echo ======================== echo Phase 2 - B - Creating Mean File of Fused Data echo "Computing image mean for dataset: " $TRAIN_SRC $CAFFE/build/tools/compute_image_mean -backend=leveldb $NETWORK2_LEVELDB_FUSION_DIR/$TRAIN_SRC-leveldb $NETWORK2_LEVELDB_FUSION_DIR/mean.binaryproto read -t 10 ############### echo ------------------------------------------------------ # Info: # iterations (e.g. 10863 = 17 * 639. 639 is the # of test cases. # Info: Inside the prototxt, a batch # equal to # of persons (e.g. 5). 17 = 2 * 8 +1. 8 is the right temporal width echo Phase 3 - Generarting LSTM 2 Data - $NETWORK2_DIR # Clean if some previous wrong data [ -d $NETWORK2_DIR/$TRAIN_SRC-leveldb ] && [ ! -d $NETWORK2_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK2_DIR/$TRAIN_SRC-leveldb [ ! -d $NETWORK2_DIR/$TRAIN_SRC-leveldb ] && [ -d $NETWORK2_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK2_DIR/$TEST_SRC-leveldb $GIT_PROJ_DIR/$EXE_P4_NETWORK2 \ $FUSION_STYLE $WINDOW_NETWORK2 GPU $GPU_ID \ $NETWORK1_MODEL_PATH \ $NETWORK2_EXTRACTION_NETOWRK_DIR/$TRAIN_SRC.prototxt \ $FUSION_LAYERS \ $NETWORK2_DIR/$TRAIN_SRC-leveldb \ $FUSION_TRAIN_ITER \ $FUSION_STYLE $WINDOW_NETWORK2 GPU $GPU_ID \ $NETWORK1_MODEL_PATH \ $NETWORK2_EXTRACTION_NETOWRK_DIR/$TEST_SRC.prototxt \ $FUSION_LAYERS \ $NETWORK2_DIR/$TEST_SRC-leveldb \ $FUSION_TEST_ITER \ 2>&1 | tee \ $NETWORK2_DIR/z_log_dataset_net2.txt read -t 10 ############### echo ------------------------------------------------------ echo Phase 4 - A - LSTM 2 Training - $NETWORK2_DIR $NETWORK2_DIR/$TRAIN_SRC-$TEST_SRC-exe-script.sh echo ======================== echo Phase 4 - B - Temporal Evaluation $NETWORK2_DIR/$TRAIN_SRC-$TEST_SRC-window-evaluation-exe-script.sh ############### echo ------------------------------------------------------ echo echo DONE processing script "$0" echo echo ------------------------------------------------------ ================================================ FILE: ibrahim16-cvpr-simple/p1-network1/clip_w5.txt ================================================ examples/deep-activity-rec/ibrahim16-cvpr-simple/none.jpg 0 examples/deep-activity-rec/ibrahim16-cvpr-simple/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr-simple/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr-simple/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr-simple/none.jpg 1 ================================================ FILE: ibrahim16-cvpr-simple/p1-network1/trainval-test-create-mean-script.sh ================================================ #!/usr/bin/env sh # This script converts the vollyball data into leveldb format. OUTDIR=examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1 echo "Computing image mean for trainval dataset: " $OUTDIR ./build/tools/compute_image_mean -backend=leveldb $OUTDIR/trainval-leveldb $OUTDIR/mean.binaryproto echo "Done." ================================================ FILE: ibrahim16-cvpr-simple/p1-network1/trainval-test-exe-script-resume.sh ================================================ #!/usr/bin/env sh OUTDIR=examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1 GPU_ID=0 ITER=15000 echo "Resuming Caffe using GPU" $GPU "In Directory " $OUTDIR "Starting from iteration " $ITER ./build/tools/caffe train 2> $OUTDIR/z_trainval-test-log-resume.txt \ --solver $OUTDIR/trainval-test-solver.prototxt --snapshot=$OUTDIR/z_snapshot_iter_$ITER.solverstate --gpu $GPU_ID ================================================ FILE: ibrahim16-cvpr-simple/p1-network1/trainval-test-exe-script.sh ================================================ #!/usr/bin/env sh OUTDIR=examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1 GPU_ID=0 echo "Running Caffe using GPU" $GPU "In Directory " $OUTDIR ./build/tools/caffe train 2> $OUTDIR/z_trainval-test-log.txt \ --solver $OUTDIR/trainval-test-solver.prototxt --weights models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel --gpu $GPU_ID ================================================ FILE: ibrahim16-cvpr-simple/p1-network1/trainval-test-network.prototxt ================================================ name: "volleyball_game_proto" layer { name: "clip_data" type: "ImageData" top: "dummy" top: "clip" image_data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/clip_w5.txt" batch_size: 250 } } layer { name: "Silence" type: "Silence" bottom: "dummy" } layer { name: "volleyball_game" type: "Data" top: "data" top: "label" include { phase: TRAIN } transform_param { mirror: true crop_size: 227 mean_file: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/mean.binaryproto" } data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/trainval-leveldb" batch_size: 10 backend: LEVELDB } } layer { name: "volleyball_game" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { mirror: false crop_size: 227 mean_file: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/mean.binaryproto" } data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/test-leveldb" batch_size: 10 backend: LEVELDB } } layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 96 kernel_size: 11 stride: 4 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm1" type: "LRN" bottom: "pool1" top: "norm1" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv2" type: "Convolution" bottom: "norm1" top: "conv2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm2" type: "LRN" bottom: "pool2" top: "norm2" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv3" type: "Convolution" bottom: "norm2" top: "conv3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" } layer { name: "conv4" type: "Convolution" bottom: "conv3" top: "conv4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" } layer { name: "conv5" type: "Convolution" bottom: "conv4" top: "conv5" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5" } layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "fc6" type: "InnerProduct" bottom: "pool5" top: "fc6" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu6" type: "ReLU" bottom: "fc6" top: "fc6" } layer { name: "drop6" type: "Dropout" bottom: "fc6" top: "fc6" dropout_param { dropout_ratio: 0.5 } } layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" # Note that lr_mult can be set to 0 to disable any fine-tuning of this, and any other, layer param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu7" type: "ReLU" bottom: "fc7" top: "fc7" } layer { name: "drop7" type: "Dropout" bottom: "fc7" top: "fc7" dropout_param { dropout_ratio: 0.5 } } layer { name: "lstm1" type: "Lstm" bottom: "fc7" bottom: "clip" top: "lstm1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } lstm_param { num_output: 3000 clipping_threshold: 0.1 weight_filler { type: "gaussian" std: 0.1 } bias_filler { type: "constant" } } } layer { name: "fc8_volleyball" type: "InnerProduct" bottom: "lstm1" top: "fc8_volleyball" # lr_mult is set to higher than for other layers, because this layer is starting from random while the others are already trained param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } inner_product_param { num_output: 9 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc8_volleyball" bottom: "label" } layer { name: "accuracy" type: "Accuracy" bottom: "fc8_volleyball" bottom: "label" top: "accuracy" include { phase: TEST } } ================================================ FILE: ibrahim16-cvpr-simple/p1-network1/trainval-test-solver.prototxt ================================================ net: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/trainval-test-network.prototxt" # testing examples are 77655 ~= 250 * 310 test_iter: 1 test_interval: 1 display: 1 base_lr: 0.00001 lr_policy: "step" gamma: 0.1 stepsize: 1 max_iter: 1 momentum: 0.9 weight_decay: 0.0005 random_seed: 750301 solver_mode: GPU snapshot: 1 snapshot_prefix: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/z_snapshot" snapshot_after_train: true ================================================ FILE: ibrahim16-cvpr-simple/p3-extract-features-networks/test.prototxt ================================================ name: "volleyball_proto" layer { name: "volleyball" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { mirror: false crop_size: 227 mean_file: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p2-ready-fuse/mean.binaryproto" } data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p2-ready-fuse/test-leveldb" batch_size: 120 backend: LEVELDB } } layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 96 kernel_size: 11 stride: 4 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm1" type: "LRN" bottom: "pool1" top: "norm1" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv2" type: "Convolution" bottom: "norm1" top: "conv2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm2" type: "LRN" bottom: "pool2" top: "norm2" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv3" type: "Convolution" bottom: "norm2" top: "conv3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" } layer { name: "conv4" type: "Convolution" bottom: "conv3" top: "conv4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" } layer { name: "conv5" type: "Convolution" bottom: "conv4" top: "conv5" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5" } layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "fc6" type: "InnerProduct" bottom: "pool5" top: "fc6" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu6" type: "ReLU" bottom: "fc6" top: "fc6" } layer { name: "drop6" type: "Dropout" bottom: "fc6" top: "fc6" dropout_param { dropout_ratio: 0.5 } } layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" # Note that lr_mult can be set to 0 to disable any fine-tuning of this, and any other, layer param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu7" type: "ReLU" bottom: "fc7" top: "fc7" } layer { name: "drop7" type: "Dropout" bottom: "fc7" top: "fc7" dropout_param { dropout_ratio: 0.5 } } layer { name: "lstm1" type: "Lstm" bottom: "fc7" top: "lstm1" param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } lstm_param { num_output: 3000 clipping_threshold: 0.1 weight_filler { type: "gaussian" std: 0.1 } bias_filler { type: "constant" } } } layer { name: "fc8_volleyball" type: "InnerProduct" bottom: "lstm1" top: "fc8_volleyball" # lr_mult is set to higher than for other layers, because this layer is starting from random while the others are already trained param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } inner_product_param { num_output: 9 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc8_volleyball" bottom: "label" } layer { name: "accuracy" type: "Accuracy" bottom: "fc8_volleyball" bottom: "label" top: "accuracy" include { phase: TEST } } layer { name: "prop" type: "Softmax" bottom: "fc8_volleyball" top: "prop" } layer { name: "Silence" type: "Silence" bottom: "prop" } ================================================ FILE: ibrahim16-cvpr-simple/p3-extract-features-networks/trainval.prototxt ================================================ name: "volleyball_proto" layer { name: "volleyball" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { mirror: false crop_size: 227 mean_file: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p2-ready-fuse/mean.binaryproto" } data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p2-ready-fuse/trainval-leveldb" batch_size: 120 backend: LEVELDB } } layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 96 kernel_size: 11 stride: 4 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm1" type: "LRN" bottom: "pool1" top: "norm1" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv2" type: "Convolution" bottom: "norm1" top: "conv2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm2" type: "LRN" bottom: "pool2" top: "norm2" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv3" type: "Convolution" bottom: "norm2" top: "conv3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" } layer { name: "conv4" type: "Convolution" bottom: "conv3" top: "conv4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" } layer { name: "conv5" type: "Convolution" bottom: "conv4" top: "conv5" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5" } layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "fc6" type: "InnerProduct" bottom: "pool5" top: "fc6" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu6" type: "ReLU" bottom: "fc6" top: "fc6" } layer { name: "drop6" type: "Dropout" bottom: "fc6" top: "fc6" dropout_param { dropout_ratio: 0.5 } } layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" # Note that lr_mult can be set to 0 to disable any fine-tuning of this, and any other, layer param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu7" type: "ReLU" bottom: "fc7" top: "fc7" } layer { name: "drop7" type: "Dropout" bottom: "fc7" top: "fc7" dropout_param { dropout_ratio: 0.5 } } layer { name: "lstm1" type: "Lstm" bottom: "fc7" top: "lstm1" param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } lstm_param { num_output: 3000 clipping_threshold: 0.1 weight_filler { type: "gaussian" std: 0.1 } bias_filler { type: "constant" } } } layer { name: "fc8_volleyball" type: "InnerProduct" bottom: "lstm1" top: "fc8_volleyball" # lr_mult is set to higher than for other layers, because this layer is starting from random while the others are already trained param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } inner_product_param { num_output: 9 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc8_volleyball" bottom: "label" } layer { name: "accuracy" type: "Accuracy" bottom: "fc8_volleyball" bottom: "label" top: "accuracy" include { phase: TEST } } layer { name: "prop" type: "Softmax" bottom: "fc8_volleyball" top: "prop" } layer { name: "Silence" type: "Silence" bottom: "prop" } ================================================ FILE: ibrahim16-cvpr-simple/p4-network2/clip_w10.txt ================================================ examples/deep-activity-rec/ibrahim16-cvpr-simple/none.jpg 0 examples/deep-activity-rec/ibrahim16-cvpr-simple/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr-simple/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr-simple/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr-simple/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr-simple/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr-simple/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr-simple/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr-simple/none.jpg 1 examples/deep-activity-rec/ibrahim16-cvpr-simple/none.jpg 1 ================================================ FILE: ibrahim16-cvpr-simple/p4-network2/trainval-test-exe-script-resume.sh ================================================ #!/usr/bin/env sh OUTDIR=examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2 GPU_ID=0 ITER=10000 echo "Resuming Caffe using GPU" $GPU "In Directory " $OUTDIR "Starting from iteration " $ITER ./build/tools/caffe train 2> $OUTDIR/z_trainval-test-log-resume.txt \ --solver $OUTDIR/trainval-test-solver.prototxt --snapshot=$OUTDIR/z_snapshot_iter_$ITER.solverstate --gpu $GPU_ID ================================================ FILE: ibrahim16-cvpr-simple/p4-network2/trainval-test-exe-script.sh ================================================ #!/usr/bin/env sh OUTDIR=examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2 GPU_ID=0 echo "Running Caffe using GPU" $GPU "In Directory " $OUTDIR ./build/tools/caffe train 2> $OUTDIR/z_trainval-test-log.txt \ --solver $OUTDIR/trainval-test-solver.prototxt --gpu $GPU_ID ================================================ FILE: ibrahim16-cvpr-simple/p4-network2/trainval-test-network.prototxt ================================================ name: "volleyball_level2" layer { name: "clip_data" type: "ImageData" top: "dummy" top: "clip" image_data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2/clip_w10.txt" batch_size: 10 } } layer { name: "Silence" type: "Silence" bottom: "dummy" } layer { name: "volleyball_level2" type: "Data" top: "data" top: "label" include { phase: TRAIN } data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2/trainval-leveldb" batch_size: 10 backend: LEVELDB } } layer { name: "volleyball_level2" type: "Data" top: "data" top: "label" include { phase: TEST } data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2/test-leveldb" batch_size: 10 backend: LEVELDB } } layer { name: "fc1" type: "InnerProduct" bottom: "data" top: "fc1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 3000 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu7" type: "ReLU" bottom: "fc1" top: "fc1" } layer { name: "drop7" type: "Dropout" bottom: "fc1" top: "fc1" dropout_param { dropout_ratio: 0.5 } } layer { name: "lstm1" type: "Lstm" bottom: "fc1" bottom: "clip" top: "lstm1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } lstm_param { num_output: 1000 clipping_threshold: 0.1 weight_filler { type: "gaussian" std: 0.1 } bias_filler { type: "constant" } } } layer { name: "fc_last" type: "InnerProduct" bottom: "lstm1" top: "fc_last" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 8 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc_last" bottom: "label" } layer { name: "accuracy" type: "Accuracy" bottom: "fc_last" bottom: "label" top: "accuracy" include { phase: TEST } } ================================================ FILE: ibrahim16-cvpr-simple/p4-network2/trainval-test-solver.prototxt ================================================ net: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2/trainval-test-network.prototxt" # testing examples are 13370 = 1337 * 10. test_iter: 2 test_interval: 2 display: 2 base_lr: 0.0001 lr_policy: "step" gamma: 0.1 stepsize: 2 max_iter: 2 momentum: 0.9 weight_decay: 0.0005 random_seed: 750301 solver_mode: GPU snapshot: 2 snapshot_prefix: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2/z_snapshot" snapshot_after_train: true ================================================ FILE: ibrahim16-cvpr-simple/p4-network2/trainval-test-window-evaluation-exe-script.sh ================================================ #!/usr/bin/env sh OUTDIR=examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2 WINDOW=10 GPU_ID=0 TEST_EXAMPLES=7 ITER=2 LAYER=prop examples/deep-activity-rec/exePhase4 \ $WINDOW \ GPU $GPU_ID \ $OUTDIR/z_snapshot_iter_$ITER.caffemodel \ $OUTDIR/trainval-test-window-evaluation-network.prototxt \ $LAYER \ $TEST_EXAMPLES \ 2>&1 | tee $OUTDIR/z_trainval-test-window-evaluation-log-prop.txt ================================================ FILE: ibrahim16-cvpr-simple/p4-network2/trainval-test-window-evaluation-network.prototxt ================================================ name: "volleyball_level2" layer { name: "volleyball_data" type: "Data" top: "data" top: "label" include { phase: TEST } data_param { source: "examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2/test-leveldb" batch_size: 10 backend: LEVELDB } } layer { name: "fc1" type: "InnerProduct" bottom: "data" top: "fc1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 3000 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu7" type: "ReLU" bottom: "fc1" top: "fc1" } layer { name: "drop7" type: "Dropout" bottom: "fc1" top: "fc1" dropout_param { dropout_ratio: 0.5 } } layer { name: "lstm1" type: "Lstm" bottom: "fc1" top: "lstm1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } lstm_param { num_output: 1000 clipping_threshold: 0.1 weight_filler { type: "gaussian" std: 0.1 } bias_filler { type: "constant" } } } layer { name: "fc_last" type: "InnerProduct" bottom: "lstm1" top: "fc_last" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 8 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc_last" bottom: "label" } layer { name: "accuracy" type: "Accuracy" bottom: "fc_last" bottom: "label" top: "accuracy" include { phase: TEST } } layer { name: "prop" type: "Softmax" bottom: "fc_last" top: "prop" } layer { name: "argmax" type: "ArgMax" argmax_param { out_max_val: false top_k: 1 } bottom: "prop" top: "argmax" } ================================================ FILE: ibrahim16-cvpr-simple/script-clean.sh ================================================ #!/usr/bin/env bash ROOT_DIR=examples/deep-activity-rec/ibrahim16-cvpr-simple # Phase 1 artifacts rm -r $ROOT_DIR/p1-network1/test-leveldb rm -r $ROOT_DIR/p1-network1/trainval-leveldb rm $ROOT_DIR/p1-network1/mean.binaryproto rm $ROOT_DIR/p1-network1/z_log_dataset_net1.txt rm $ROOT_DIR/p1-network1/z_trainval-test-log.txt rm $ROOT_DIR/p1-network1/z_snapshot_iter_*.caffemodel rm $ROOT_DIR/p1-network1/z_snapshot_iter_*.solverstate # Phasse 2 rm -r $ROOT_DIR/p2-ready-fuse # Phasse 3 & 4 rm -r $ROOT_DIR/p4-network2/test-leveldb rm -r $ROOT_DIR/p4-network2/trainval-leveldb rm $ROOT_DIR/p4-network2/z_log_dataset_net2.txt rm $ROOT_DIR/p4-network2/z_trainval-test-log.txt rm $ROOT_DIR/p4-network2/z_trainval-test-window-evaluation-log-prop.txt rm $ROOT_DIR/p4-network2/z_snapshot_iter_*.caffemodel rm $ROOT_DIR/p4-network2/z_snapshot_iter_*.solverstate ================================================ FILE: ibrahim16-cvpr-simple/script-simple-expected-log.txt ================================================ mostafa@mostafa:~/workspaces/git/caffe-lstm$ examples/deep-activity-rec/ibrahim16-cvpr-simple/script-simple.sh ------------------------------------------------------ START processing script examples/deep-activity-rec/ibrahim16-cvpr-simple/script-simple.sh OUTPUT Directory is /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple Doing path VALIDATIONS READY...STEADY...Gooo ? ------------------------------------------------------ Phase 1 - Generating Network 1 Data - /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1 Start: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/exePhase1_2 LSTM 1 preparation Loading the dataset... Preparing Dataset: train from config file: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/dataset-config-simple/train.txt ************************ Preparing Dataset: val from config file: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/dataset-config-simple/val.txt ************************ Preparing Dataset: test from config file: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/dataset-config-simple/test.txt 41 is processed ************************ Preparing Dataset: trainval from config file: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/dataset-config-simple/trainval.txt 39 is processed ************************ train dataset is EMPTY val dataset is EMPTY Total frames for dataset test = 2 Total frames for dataset trainval = 2 Total videos = 2 - total frames = 4 Scenes Labels: l-pass 0 r_spike 1 Persons Labels: blocking 5 digging 1 falling 7 moving 2 setting 3 spiking 6 standing 0 waiting 4 Scenes Labels frequency: l-pass 2 r_spike 2 Players Labels frequency: blocking 1 digging 2 falling 2 moving 5 setting 1 spiking 1 standing 33 waiting 3 Temporal window = 5 with step = 1 Creating a new dataset Creates a database at: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/test-leveldb/ (H, W, C) = 256 256 3 WARNING: Logging before InitGoogleLogging() is written to STDERR I0612 16:35:57.784998 14753 leveldb-writer.cpp:52] Opening leveldb /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/test-leveldb/ Creating a new dataset Creates a database at: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/trainval-leveldb/ (H, W, C) = 256 256 3 I0612 16:35:57.907624 14753 leveldb-writer.cpp:52] Opening leveldb /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/trainval-leveldb/ Extracting shuffled elements from test Data Set. Total videos = 1 Total images for current data set is 2. Overall entries will be <= 120 E0612 16:36:01.151640 14753 leveldb-writer.cpp:223] /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/test-leveldb/: Processed 120 files. Labels Statistics for db /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/test-leveldb/ Total Records 120 ********************************************************* Key = 0 => Value 100 instances Key = 1 => Value 5 instances Key = 2 => Value 10 instances Key = 3 => Value 5 instances ********************************************************* Key = 0 => Value 83.3 % Key = 1 => Value 4.2 % Key = 2 => Value 8.3 % Key = 3 => Value 4.2 % Extracting shuffled elements from trainval Data Set. Total videos = 1 Total images for current data set is 2. Overall entries will be <= 120 E0612 16:36:04.130539 14753 leveldb-writer.cpp:223] /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/trainval-leveldb/: Processed 120 files. Labels Statistics for db /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/trainval-leveldb/ Total Records 120 ********************************************************* Key = 0 => Value 65 instances Key = 1 => Value 5 instances Key = 2 => Value 15 instances Key = 4 => Value 15 instances Key = 5 => Value 5 instances Key = 6 => Value 5 instances Key = 7 => Value 10 instances ********************************************************* Key = 0 => Value 54.2 % Key = 1 => Value 4.2 % Key = 2 => Value 12.5 % Key = 4 => Value 12.5 % Key = 5 => Value 4.2 % Key = 6 => Value 4.2 % Key = 7 => Value 8.3 % Bye: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/exePhase1_2 ======================== Phase 1 - B - Computing Mean of Network 1 Training Data - /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1 Computing image mean for trainval dataset: examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1 Done. ======================== Phase 1 - C - Network 1 Training Running Caffe using GPU In Directory examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1 ------------------------------------------------------ Phase 2 - A - Generating Data to be Fused - /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p2-ready-fuse Start: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/exePhase1_2 Data Fusion for LSTM 2 Loading the dataset... Preparing Dataset: train from config file: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/dataset-config-simple/train.txt ************************ Preparing Dataset: val from config file: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/dataset-config-simple/val.txt ************************ Preparing Dataset: test from config file: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/dataset-config-simple/test.txt 41 is processed ************************ Preparing Dataset: trainval from config file: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/dataset-config-simple/trainval.txt 39 is processed ************************ train dataset is EMPTY val dataset is EMPTY Total frames for dataset test = 2 Total frames for dataset trainval = 2 Total videos = 2 - total frames = 4 Scenes Labels: l-pass 0 r_spike 1 Persons Labels: blocking 5 digging 1 falling 7 moving 2 setting 3 spiking 6 standing 0 waiting 4 Scenes Labels frequency: l-pass 2 r_spike 2 Players Labels frequency: blocking 1 digging 2 falling 2 moving 5 setting 1 spiking 1 standing 33 waiting 3 Temporal window = 10 with step = 1 Creating a new dataset Creates a database at: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p2-ready-fuse/test-leveldb/ (H, W, C) = 256 256 3 WARNING: Logging before InitGoogleLogging() is written to STDERR I0612 16:38:30.898288 14832 leveldb-writer.cpp:52] Opening leveldb /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p2-ready-fuse/test-leveldb/ Creating a new dataset Creates a database at: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p2-ready-fuse/trainval-leveldb/ (H, W, C) = 256 256 3 I0612 16:38:31.156790 14832 leveldb-writer.cpp:52] Opening leveldb /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p2-ready-fuse/trainval-leveldb/ Extracting shuffled elements from test Data Set. Total videos = 1 Total images for current data set is 2. Overall entries will be = 240 E0612 16:38:38.010151 14832 leveldb-writer.cpp:223] /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p2-ready-fuse/test-leveldb/: Processed 240 files. Labels Statistics for db /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p2-ready-fuse/test-leveldb/ Total Records 240 ********************************************************* Key = 0 => Value 120 instances Key = 1 => Value 120 instances ********************************************************* Key = 0 => Value 50.0 % Key = 1 => Value 50.0 % Extracting shuffled elements from trainval Data Set. Total videos = 1 Total images for current data set is 2. Overall entries will be = 240 E0612 16:38:44.260620 14832 leveldb-writer.cpp:223] /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p2-ready-fuse/trainval-leveldb/: Processed 240 files. Labels Statistics for db /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p2-ready-fuse/trainval-leveldb/ Total Records 240 ********************************************************* Key = 0 => Value 120 instances Key = 1 => Value 120 instances ********************************************************* Key = 0 => Value 50.0 % Key = 1 => Value 50.0 % Bye: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/exePhase1_2 ======================== Phase 2 - B - Creating Mean File of Fused Data Computing image mean for dataset: trainval ------------------------------------------------------ Phase 3 - Generarting LSTM 2 Data - /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2 E0612 16:38:56.145875 14855 exePhase3.cpp:346] Make sure to have LD_LIBRARY_PATH pointing to LSTM implementation in case of LSTM E0612 16:38:56.303081 14855 exePhase3.cpp:166] Fusing style = max_pool_players_2 E0612 16:38:56.303119 14855 exePhase3.cpp:169] frames_window = 10 E0612 16:38:56.303136 14855 exePhase3.cpp:171] Expected batch size = 120 E0612 16:38:56.303158 14855 exePhase3.cpp:184] Using CPU E0612 16:38:56.303218 14855 exePhase3.cpp:191] Model: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/z_snapshot_iter_1.caffemodel E0612 16:38:56.303242 14855 exePhase3.cpp:192] Proto: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p3-extract-features-networks/trainval.prototxt E0612 16:38:56.303267 14855 exePhase3.cpp:194] Creating the test network E0612 16:39:02.240625 14855 exePhase3.cpp:197] Loading the Model [libprotobuf WARNING google/protobuf/io/coded_stream.cc:505] Reading dangerously large protocol message. If the message turns out to be larger than 2147483647 bytes, parsing will be halted for security reasons. To increase the limit (or to disable these warnings), see CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h. [libprotobuf WARNING google/protobuf/io/coded_stream.cc:78] The total number of bytes read was 568239168 E0612 16:39:17.920809 14855 exePhase3.cpp:206] # of blobs is 2 E0612 16:39:17.920907 14855 exePhase3.cpp:212] blob_name: fc7 E0612 16:39:18.012258 14855 exePhase3.cpp:212] blob_name: lstm1 E0612 16:39:18.012333 14855 exePhase3.cpp:223] num_mini_batches: 2 Creates a database at: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2/trainval-leveldb E0612 16:39:43.161676 14855 exePhase3.cpp:278] E0612 16:39:43.161788 14855 exePhase3.cpp:285] ith Vector Length = 4096 E0612 16:39:43.161806 14855 exePhase3.cpp:285] ith Vector Length = 3000 E0612 16:39:43.165956 14855 exePhase3.cpp:311] Fused Vector Length = 14192 E0612 16:40:10.215795 14855 leveldb-writer.cpp:223] /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2/trainval-leveldb: Processed 20 files. Labels Statistics for db /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2/trainval-leveldb Total Records 20 ********************************************************* Key = 0 => Value 10 instances Key = 1 => Value 10 instances ********************************************************* Key = 0 => Value 50.0 % Key = 1 => Value 50.0 % E0612 16:40:10.326062 14855 exePhase3.cpp:356] Successfully extracted the features! E0612 16:40:10.326143 14855 exePhase3.cpp:166] Fusing style = max_pool_players_2 E0612 16:40:10.326162 14855 exePhase3.cpp:169] frames_window = 10 E0612 16:40:10.326179 14855 exePhase3.cpp:171] Expected batch size = 120 E0612 16:40:10.326197 14855 exePhase3.cpp:184] Using CPU E0612 16:40:10.326225 14855 exePhase3.cpp:191] Model: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p1-network1/z_snapshot_iter_1.caffemodel E0612 16:40:10.326256 14855 exePhase3.cpp:192] Proto: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p3-extract-features-networks/test.prototxt E0612 16:40:10.326279 14855 exePhase3.cpp:194] Creating the test network E0612 16:40:18.722841 14855 exePhase3.cpp:197] Loading the Model [libprotobuf WARNING google/protobuf/io/coded_stream.cc:505] Reading dangerously large protocol message. If the message turns out to be larger than 2147483647 bytes, parsing will be halted for security reasons. To increase the limit (or to disable these warnings), see CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h. [libprotobuf WARNING google/protobuf/io/coded_stream.cc:78] The total number of bytes read was 568239168 E0612 16:40:22.197417 14855 exePhase3.cpp:206] # of blobs is 2 E0612 16:40:22.197490 14855 exePhase3.cpp:212] blob_name: fc7 E0612 16:40:22.197540 14855 exePhase3.cpp:212] blob_name: lstm1 E0612 16:40:22.197576 14855 exePhase3.cpp:223] num_mini_batches: 2 Creates a database at: /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2/test-leveldb E0612 16:41:19.411211 14855 leveldb-writer.cpp:223] /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2/test-leveldb: Processed 20 files. Labels Statistics for db /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2/test-leveldb Total Records 20 ********************************************************* Key = 0 => Value 10 instances Key = 1 => Value 10 instances ********************************************************* Key = 0 => Value 50.0 % Key = 1 => Value 50.0 % E0612 16:41:19.490344 14855 exePhase3.cpp:356] Successfully extracted the features! ------------------------------------------------------ Phase 4 - A - LSTM 2 Training - /home/mostafa/workspaces/git/caffe-lstm/examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2 Running Caffe using GPU In Directory examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2 ======================== Phase 4 - B - Temporal Evaluation E0612 16:41:46.152066 15089 exePhase4.cpp:252] Make sure to have LD_LIBRARY_PATH pointing to LSTM implementation in case of LSTM E0612 16:41:46.169050 15089 exePhase4.cpp:146] Temporal Window = 10 E0612 16:41:46.169098 15089 exePhase4.cpp:159] Using CPU E0612 16:41:46.169178 15089 exePhase4.cpp:166] Model: examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2/z_snapshot_iter_2.caffemodel E0612 16:41:46.169193 15089 exePhase4.cpp:167] Proto: examples/deep-activity-rec/ibrahim16-cvpr-simple/p4-network2/trainval-test-window-evaluation-network.prototxt E0612 16:41:46.169231 15089 exePhase4.cpp:169] Creating the test network E0612 16:41:48.182070 15089 exePhase4.cpp:172] Loading the Model E0612 16:41:48.637673 15089 exePhase4.cpp:176] blob_name: prop E0612 16:41:48.637748 15089 exePhase4.cpp:181] num_mini_batches: 7 Test 1: Result = 0 GroundTruth = 0 Test 2: Result = 7 GroundTruth = 1 Test 3: Result = 0 GroundTruth = 0 Test 4: Result = 7 GroundTruth = 1 Test 5: Result = 0 GroundTruth = 0 Test 6: Result = 7 GroundTruth = 1 Test 7: Result = 0 GroundTruth = 0 Total testing frames: 7 with temporal window: 1 Temporal accuracy : 57.14 % ======================================================================================= Confusion Matrix - Truth (col) / Result(row) T/R: 0 1 7 ======================================================================================= 0: 4 0 0 => Total Correct = 4 / 4 = 100.00 % 1: 0 0 3 => Total Correct = 0 / 3 = 0.00 % 7: 0 0 0 => Total Correct = 0 / 0 = 0.00 % T/R: 0 1 7 ======================================================================================= 0: 100.00 0.00 0.00 1: 0.00 0.00 100.00 7: 0.00 0.00 0.00 To get labels corresponding to IDs..see dataset loading logs ------------------------------------------------------ DONE processing script examples/deep-activity-rec/ibrahim16-cvpr-simple/script-simple.sh ================================================ FILE: ibrahim16-cvpr-simple/script-simple.sh ================================================ #!/usr/bin/env bash CAFFE=/cs/vml2/msibrahi/workspaces/caffe-lstm GIT_PROJ_DIR=$CAFFE/examples/deep-activity-rec DATASET_VIDEOS=$GIT_PROJ_DIR/volleyball-simple DATASET_CONFIG=$GIT_PROJ_DIR/dataset-config-simple OUTPUT_DIR=$GIT_PROJ_DIR/ibrahim16-cvpr-simple TRAIN_SRC=trainval TEST_SRC=test WINDOW_NETWORK1=5 WINDOW_NETWORK2=10 STEP=1 GPU_ID=0 NETWORK1_HIDDEN=3000 NETWORK1_TRAIN_ITERS=1 # Fusion Styles: Choose 0-7 # 0 => Conc / 1 group 1 => Max / 1 group 4 => Avg / 1 group 7 => sum / 1 group # 2 => Max / 2 groups 5 => Avg / 2 groups 3 => Max / 4 groups 6 => Avg / 4 groups FUSION_STYLE=2 FUSION_TRAIN_ITER=2 FUSION_TEST_ITER=2 VAR_FUSION_LAYERS_VAL="2 fc7 lstm1" VAR_FUSION_LAYERS="FUSION_LAYERS" declare "$VAR_FUSION_LAYERS=$VAR_FUSION_LAYERS_VAL" NETWORK1_DIR=$OUTPUT_DIR/p1-network1 NETWORK1_MODEL_PATH=$NETWORK1_DIR/z_snapshot_iter_$NETWORK1_TRAIN_ITERS.caffemodel NETWORK2_LEVELDB_FUSION_DIR=$OUTPUT_DIR/p2-ready-fuse NETWORK2_EXTRACTION_NETOWRK_DIR=$OUTPUT_DIR/p3-extract-features-networks NETWORK2_DIR=$OUTPUT_DIR/p4-network2 # Programs EXE_P1_NETWORK1=exePhase1_2 EXE_P2_FUSE=exePhase1_2 EXE_P4_NETWORK2=exePhase3 ########################################################################### echo ------------------------------------------------------ echo echo "START processing script" "$0" echo "OUTPUT Directory is " $OUTPUT_DIR echo echo Doing path VALIDATIONS ## Some directories / files validation [ -d $CAFFE ] || echo Directory $CAFFE NOOOT exist [ -d $OUTPUT_DIR ] || echo Directory $OUTPUT_DIR NOOOT exist [ -d $DATASET_VIDEOS ] || echo Directory $DATASET_VIDEOS NOOOT exist [ -d $DATASET_CONFIG ] || echo Directory $DATASET_CONFIG NOOOT exist [ -d $NETWORK1_DIR ] || echo Directory $NETWORK1_DIR NOOOT exist [ -d $NETWORK2_EXTRACTION_NETOWRK_DIR ] || echo Directory $NETWORK2_EXTRACTION_NETOWRK_DIR NOOOT exist [ -d $NETWORK2_DIR ] || echo Directory $NETWORK2_DIR NOOOT exist echo READY...STEADY...Gooo ? read -t 60 cd $CAFFE ########################################################################### echo ------------------------------------------------------ echo Phase 1 - Generating Network 1 Data - $NETWORK1_DIR # Clean if some previous wrong data [ -d $NETWORK1_DIR/$TRAIN_SRC-leveldb ] && [ ! -d $NETWORK1_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK1_DIR/$TRAIN_SRC-leveldb [ ! -d $NETWORK1_DIR/$TRAIN_SRC-leveldb ] && [ -d $NETWORK1_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK1_DIR/$TEST_SRC-leveldb $GIT_PROJ_DIR/$EXE_P1_NETWORK1 \ $DATASET_VIDEOS \ $DATASET_CONFIG \ $NETWORK1_DIR $WINDOW_NETWORK1 $STEP 1 \ 2>&1 | tee \ $NETWORK1_DIR/z_log_dataset_net1.txt echo ======================== echo Phase 1 - B - Computing Mean of Network 1 Training Data - $NETWORK1_DIR $NETWORK1_DIR/$TRAIN_SRC-$TEST_SRC-create-mean-script.sh read -t 10 echo ======================== echo Phase 1 - C - Network 1 Training $NETWORK1_DIR/$TRAIN_SRC-$TEST_SRC-exe-script.sh read -t 10 ############### echo ------------------------------------------------------ echo Phase 2 - A - Generating Data to be Fused - $NETWORK2_LEVELDB_FUSION_DIR mkdir -p $NETWORK2_LEVELDB_FUSION_DIR # Clean if some previous wrong data [ -d $NETWORK2_LEVELDB_FUSION_DIR/$TRAIN_SRC-leveldb ] && [ ! -d $NETWORK2_LEVELDB_FUSION_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK2_LEVELDB_FUSION_DIR/$TRAIN_SRC-leveldb [ ! -d $NETWORK2_LEVELDB_FUSION_DIR/$TRAIN_SRC-leveldb ] && [ -d $NETWORK2_LEVELDB_FUSION_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK2_LEVELDB_FUSION_DIR/$TEST_SRC-leveldb $GIT_PROJ_DIR/$EXE_P2_FUSE \ $DATASET_VIDEOS \ $DATASET_CONFIG \ $NETWORK2_LEVELDB_FUSION_DIR $WINDOW_NETWORK2 $STEP 0 \ 2>&1 | tee \ $NETWORK2_LEVELDB_FUSION_DIR/z_log_dataset_fuse.txt echo ======================== echo Phase 2 - B - Creating Mean File of Fused Data echo "Computing image mean for dataset: " $TRAIN_SRC $CAFFE/build/tools/compute_image_mean -backend=leveldb $NETWORK2_LEVELDB_FUSION_DIR/$TRAIN_SRC-leveldb $NETWORK2_LEVELDB_FUSION_DIR/mean.binaryproto read -t 10 ############### echo ------------------------------------------------------ # Info: # iterations (e.g. 10863 = 17 * 639. 639 is the # of test cases. # Info: Inside the prototxt, a batch # equal to # of persons (e.g. 5). 17 = 2 * 8 +1. 8 is the right temporal width echo Phase 3 - Generarting LSTM 2 Data - $NETWORK2_DIR # Clean if some previous wrong data [ -d $NETWORK2_DIR/$TRAIN_SRC-leveldb ] && [ ! -d $NETWORK2_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK2_DIR/$TRAIN_SRC-leveldb [ ! -d $NETWORK2_DIR/$TRAIN_SRC-leveldb ] && [ -d $NETWORK2_DIR/$TEST_SRC-leveldb ] && rm -r $NETWORK2_DIR/$TEST_SRC-leveldb $GIT_PROJ_DIR/$EXE_P4_NETWORK2 \ $FUSION_STYLE $WINDOW_NETWORK2 GPU $GPU_ID \ $NETWORK1_MODEL_PATH \ $NETWORK2_EXTRACTION_NETOWRK_DIR/$TRAIN_SRC.prototxt \ $FUSION_LAYERS \ $NETWORK2_DIR/$TRAIN_SRC-leveldb \ $FUSION_TRAIN_ITER \ $FUSION_STYLE $WINDOW_NETWORK2 GPU $GPU_ID \ $NETWORK1_MODEL_PATH \ $NETWORK2_EXTRACTION_NETOWRK_DIR/$TEST_SRC.prototxt \ $FUSION_LAYERS \ $NETWORK2_DIR/$TEST_SRC-leveldb \ $FUSION_TEST_ITER \ 2>&1 | tee \ $NETWORK2_DIR/z_log_dataset_net2.txt read -t 10 ############### echo ------------------------------------------------------ echo Phase 4 - A - LSTM 2 Training - $NETWORK2_DIR $NETWORK2_DIR/$TRAIN_SRC-$TEST_SRC-exe-script.sh echo ======================== echo Phase 4 - B - Temporal Evaluation $NETWORK2_DIR/$TRAIN_SRC-$TEST_SRC-window-evaluation-exe-script.sh ############### echo ------------------------------------------------------ echo echo DONE processing script "$0" echo echo ------------------------------------------------------ ================================================ FILE: src/custom-abbreviation.h ================================================ /* * custom-abbreviation.h * * Created on: 2015-06-08 * Author: Moustafa S. Ibrahim */ #ifndef CUSTOM_ABBREVIATION_H_ #define CUSTOM_ABBREVIATION_H_ #include namespace MostCV { typedef vector vi; typedef vector vd; typedef vector< vi > vvi; typedef vector< vd > vvd; typedef vector vs; typedef long long ll; typedef long double ld; //typedef unsigned char uchar; const ll OO = (ll)1e10; const double PI = std::acos(-1.0); const long double EPS = (1e-15); // 4 orthogonal directions, 4 diagonal directions and last is same position //int DR11[9] = {1, 0, 0, -1, 1, 1, -1, -1, 0}; //int DC11[9] = {0, 1, -1, 0, -1, 1, -1, 1, 0}; enum DIRS_ENUM {Left, Right, Bottpm, Top}; } #endif /* CUSTOM_ABBREVIATION_H_ */ ================================================ FILE: src/custom-images-macros.h ================================================ /* * custom-images-macros.h * * Created on: 2015-05-21 * Author: Moustafa S. Ibrahim */ #ifndef CUSTOM_IMAGES_MACROS_H_ #define CUSTOM_IMAGES_MACROS_H_ namespace MostCV { #define REPIMG2(y, x, img) for(int y=0;y<(int)(img.rows);++y) for(int x=0;x<(int)(img.cols);++x) #define REPIMG3(y, x, c, img) for(int y=0;y<(int)(img.rows);++y) for(int x=0;x<(int)(img.cols);++x) for(int c=0;c<(int)(img.channels());++c) #define REPIMG_JUMP(y, x, dy, dx, img) for(int y=0;y<(int)(img.rows);y+=dy) for(int x=0;x<(int)(img.cols);x+=dx) } #endif /* CUSTOM_IMAGES_MACROS_H_ */ ================================================ FILE: src/custom-macros.h ================================================ /* * custom-macros.h * * Created on: 2015-05-21 * Author: Moustafa S. Ibrahim */ #ifndef CUSTOM_MACROS_H_ #define CUSTOM_MACROS_H_ namespace MostCV { #define ALL(v) ((v).begin()), ((v).end()) #define RALL(v) ((v).rbegin()), ((v).rend()) #define SZ(v) ((int)((v).size())) #define CLR(v, d) memset(v, d, sizeof(v)) #define REP(i, v) for(int i=0;i=(int)(n);--i) #define REPA(v) lpi(i, 0, SZ(v)) lpi(j, 0, SZ(v[i])) // ToDo: http://www.quora.com/What-are-some-macros-that-are-used-in-programming-contests } #endif /* CUSTOM_MACROS_H_ */ ================================================ FILE: src/dlib-tracker-wrapper.cpp ================================================ /* * dlib-tracker-wrapper.cpp * * Created on: 2015-06-22 * Author: Moustafa S. Ibrahim */ #include "dlib-tracker-wrapper.h" #include "custom-images-macros.h" #include using std::cerr; namespace MostCV { DlibTrackerWrapper::DlibTrackerWrapper(Rect initial_location) { initial_location_ = initial_location; step_ = 0; } Rect DlibTrackerWrapper::UpdateTracker(Mat img) { Rect img_rect = Rect(0, 0, img.cols-1, img.rows-1); cv::Mat gray_img; if (CV_8U != img.type() || 1 != img.channels()) cv::cvtColor(img, gray_img, cv::COLOR_BGR2GRAY); else gray_img = img; dlib::array2d dlib_img(gray_img.rows, gray_img.cols); REPIMG2(y, x, gray_img) dlib_img[y][x] = gray_img.at (y, x); if (step_ == 0) { initial_location_ &= img_rect; // Fix first one in case if(initial_location_.area() == 0) { cerr<<"Dlib: Empty rectangle for tracking! Let's do workaround\n"; initial_location_ = Rect(0, 0, 1, 1); } tracker_.start_track(dlib_img, dlib::centered_rect(dlib::point(initial_location_.x + initial_location_.width / 2, initial_location_.y + initial_location_.height / 2), initial_location_.width, initial_location_.height)); ++step_; return initial_location_; } tracker_.update(dlib_img); int y1 = tracker_.get_position().top(); int x1 = tracker_.get_position().left(); int y2 = tracker_.get_position().bottom(); int x2 = tracker_.get_position().right(); ++step_; Rect rect = Rect(x1, y1, x2-x1, y2-y1); rect &= img_rect; if(rect.area() < 1) // zero areas usually cause problems. Let's give them 1 area box rect = Rect(0, 0, 1, 1); return rect; } // back like: 0 -1 -2 -3 and forward 0 1 2 3 4 5 6. Helps when tracker centered on frame pair, vector > DlibTrackerWrapper::Process(vector backwardImgs, vector forwardImgs) { vector ret; DlibTrackerWrapper backTracker(initial_location_); for(auto img: backwardImgs) ret.push_back( backTracker.UpdateTracker(img) ); if(forwardImgs.size() > 0) { std::reverse(ret.begin(), ret.end()); std::reverse(backwardImgs.begin(), backwardImgs.end()); backwardImgs.pop_back(); ret.pop_back(); // remove the middle, it will be added again. This is initial_location_ } for(auto img: forwardImgs) { ret.push_back( UpdateTracker(img) ); backwardImgs.push_back(img); } return std::make_pair(backwardImgs, ret); } } ================================================ FILE: src/dlib-tracker-wrapper.h ================================================ /* * dlib-tracker-wrapper.h * * Created on: 2015-06-22 * Author: Moustafa S. Ibrahim */ #ifndef DLIB_TRACKER_WRAPPER_H_ #define DLIB_TRACKER_WRAPPER_H_ #include "opencv2/core/core.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" using cv::Mat; using cv::Ptr; using cv::Scalar; using cv::Rect; using cv::Point; using cv::Size; #include #include #include #include #include using std::vector; using std::pair; namespace MostCV { class DlibTrackerWrapper { public: DlibTrackerWrapper(Rect initial_location); Rect UpdateTracker(Mat img); pair, vector > Process(vector backwardImgs, vector forwardImgs); private: dlib::correlation_tracker tracker_; Rect initial_location_; int step_; }; } #endif /* DLIB_TRACKER_WRAPPER_H_ */ ================================================ FILE: src/images-utilities.cpp ================================================ #include "images-utilities.h" #include using std::cout; #include "opencv2/core/core.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" #include "custom-images-macros.h" #include "custom-macros.h" namespace MostCV { void ShowImage(Mat image, int wait, bool bShow, string stringWindowName) { if (bShow) { cv::namedWindow(stringWindowName.c_str(), 1); cv::imshow(stringWindowName.c_str(), image); cv::waitKey(wait); } } void RemoveImagePixels(Mat img, Mat mask, bool is_mask_remove_pixel_black, Point shift) { REPIMG2(y, x, mask) { if (mask.at (y, x) == 0 && !is_mask_remove_pixel_black) continue; if (mask.at (y, x) > 0 && is_mask_remove_pixel_black) continue; if (img.channels() == 3) { for (int c = 0; c < 3; ++c) img.at (y + shift.y, x + shift.x)[c] = 0; } else img.at (y + shift.y, x + shift.x) = 0; } } void FixMask(Mat mask, int threshold) { int cnt = 0; REPIMG2(y, x, mask) { if (mask.at (y, x) >= threshold) { if (mask.at (y, x) != 255) cnt++; mask.at (y, x) = 255; } else { if (mask.at (y, x) != 0) cnt++; mask.at (y, x) = 0; } } //if(cnt) cout<<"FixMask: "< &rectsSoFar, Scalar color) { int lastY = 0; int lastX = 0; Rect imgRect = Rect(0, 0, controlsMat.cols - 1, controlsMat.rows - 1); if (rectsSoFar.size()) { Rect r = rectsSoFar.back(); lastY = r.y + r.height + 5; lastX = r.x; } Rect r(lastX, lastY, 100, 30); if ((r & imgRect) != r) { lastY = 0; lastX = r.x + r.width + 5; r = Rect(lastX, lastY, 100, 30); if ((r & imgRect) != r) return false; } cv::rectangle(controlsMat, r, Scalar(255, 255, 255), 2); cv::putText(controlsMat, buttonName, Point(r.x + 2, r.y + r.height / 2), cv::FONT_HERSHEY_SIMPLEX, 0.5, color); rectsSoFar.push_back(r); return true; } vector > GetConnectedComponenets(Mat img, int area_threshold, int pixels_threshold, Scalar lo_diff, Scalar up_diff, int flags) { assert(area_threshold > 0 && pixels_threshold > 0); Mat uchar_img; Rect img_rect(0, 0, img.cols - 1, img.rows - 1); vector > componenets; if (img.channels() > 1) cvtColor(img, uchar_img, CV_BGR2GRAY); else img.copyTo(uchar_img); REPIMG2(y, x, uchar_img) { int pixel_value = (int) uchar_img.at (y, x); if (pixel_value < 1) continue; Rect rect; Mat mask = Mat::zeros(uchar_img.rows + 2, uchar_img.cols + 2, CV_8UC1); int mask_pixels_cnt = floodFill(uchar_img, mask, Point(x, y), Scalar(0), &rect, lo_diff, up_diff, flags); rect &= img_rect; if (rect.area() >= area_threshold && mask_pixels_cnt >= pixels_threshold) { Ptr component = new CComponenets(); MostCV::FixMask(mask); componenets.push_back(component); component->mask = mask(Rect(1, 1, uchar_img.cols, uchar_img.rows)); component->mask_pixels_cnt = mask_pixels_cnt; component->rect = rect; component->flood_starting_point = Point(x, y); component->parent_mask_topleft_point = Point(0, 0); } } return componenets; } Rect GetInternalBlobRect(Mat mask) { assert(mask.type() == CV_8UC1); vector > comps = MostCV::GetConnectedComponenets(mask); if(comps.size() == 0) return Rect(0, 0, 1, 1); Rect union_rect = comps[0]->rect; REP(i, comps) union_rect |= comps[i]->rect; return union_rect; } vector GetCombinedContour(Mat mask) { vector > contours; vector hierarchy; Mat componentCpy; mask.copyTo(componentCpy); cv::findContours(componentCpy, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE); vector contoursInOne; REP(j, contours) contoursInOne.insert(contoursInOne.end(), contours[j].begin(), contours[j].end()); return contoursInOne; } Rect GetRect(Mat img) { return Rect(0, 0, img.cols-1, img.rows-1); } void CenterRect(Rect &target_rect, int width, int height) { if(width > target_rect.width) { target_rect.x -= (width - target_rect.width)/2; target_rect.width = width; } if(height > target_rect.height) { target_rect.y -= (height - target_rect.height)/2; target_rect.height = height; } } bool CmpRectTopLeft(const Rect &a, const Rect &b) { if(a.y != b.y) return a.y < b.y; return a.x < b.x; } void SaveVideo(vector images, string path, int fps) { if(images.empty()) { std::cerr<<"ERROR: Empty video\n"; return; } cv::VideoWriter videoObject; videoObject.open(path, CV_FOURCC('X','V','I','D'), fps, Size(images[0].cols, images[0].rows), true); if(!videoObject.isOpened()) { std::cerr<<"ERROR: Problem in out video path: "< #include using std::vector; using std::string; #include "opencv2/core/core.hpp" using cv::Mat; using cv::Ptr; using cv::Point; using cv::Rect; using cv::Scalar; using cv::Size; #include "custom-images-macros.h" namespace MostCV { struct CComponenets { Mat mask; int mask_pixels_cnt; Rect rect; Point flood_starting_point; Point parent_mask_topleft_point; }; void ShowImage(Mat image, int wait = 0, bool bShow = true, string stringWindowName = "Image"); void RemoveImagePixels(Mat img, Mat mask, bool is_mask_remove_pixel_black = false, Point shift = Point(0, 0)); void FixMask(Mat mask, int threshold = 10); void Morphology(Mat mask, bool do_open = true, bool do_close = true, int open_kernel_sz = 3, int close_kernel_sz = 15); vector > GetConnectedComponenets(Mat img, int area_threshold = 1, int pixels_threshold = 1, Scalar lo_diff = Scalar(1), Scalar up_diff = Scalar(1), int flags = 4 + (255 << 8)); Rect GetRect(Mat img); Rect GetInternalBlobRect(Mat mask); void CenterRect(Rect &target_rect, int width, int height); vector GetCombinedContour(Mat mask); bool AddButton(Mat controlsMat, string buttonName, vector &rectsSoFar, Scalar color = Scalar(255, 0, 0)); bool CmpRectTopLeft(const Rect &a, const Rect &b); void SaveVideo(vector images, string path, int fps = 25); //////////////////////////// template Mat ToRowMat(const vector &row) { if(row.size() == 0) return Mat(0, 0, cv::DataType::type); const Type *ptr = &row[0]; Mat mat = Mat(1, row.size(), cv::DataType::type); memcpy(mat.data, ptr, row.size()*sizeof(Type)); //Mat tempMat = Mat(featureVec).t(); return mat; } template Mat ToColMat(const vector &col) { if(col.size() == 0) return Mat(0, 0, cv::DataType::type); const Type *ptr = &col[0]; Mat mat = Mat(col.size(), 1, cv::DataType::type); memcpy(mat.data, ptr, col.size()*sizeof(Type)); return mat; } template Mat To2DMat(const vector> & vectors) { Mat mat; for(auto row : vectors) mat.push_back(ToRowMat(row)); return mat; } /* template void perform(function operation, Mat mat) { if(mat.channels() == 2) { REPIMG2(y, x, mat) mat.at<> } else { } } */ } #endif /* IMAGESHELPER_H_ */ ================================================ FILE: src/leveldb-reader.cpp ================================================ /* * leveldb-reader.cpp * * Created on: 2015-05-21 * Author: Moustafa S. Ibrahim */ #include #include #include #include "leveldb-reader.h" using std::ifstream; using std::ofstream; using std::endl; using std::cout; #include "utilities.h" MostCV::LevelDBReader::LevelDBReader(const string & database_path, const string & sorted_list_file) { record_idx_ = 0; cache_limit_ = 1000; database_path_ = database_path; is_caching = true; if (sorted_list_file == "") is_caching = false; if (is_caching) { ifstream ifs(sorted_list_file.c_str()); string line; assert(ifs.is_open()); while (getline(ifs, line)) { int pos = line.find(' '); if (pos != -1) line = line.substr(0, pos); pos = line.find_last_of('/'); if (pos != -1) line = line.substr(pos + 1); if (line != "") vectors_names_.push_back(line); } vector images_names_temp = vectors_names_; std::sort(images_names_temp.begin(), images_names_temp.end()); assert(images_names_temp == vectors_names_); } leveldb::Options options; options.create_if_missing = true; leveldb::Status status = leveldb::DB::Open(options, database_path_, &database_); assert(status.ok()); database_iter_ = database_->NewIterator(leveldb::ReadOptions()); assert(database_iter_ != NULL); database_iter_->SeekToFirst(); } MostCV::LevelDBReader::~LevelDBReader() { if (database_iter_ != NULL) delete database_iter_; if (database_ != NULL) delete database_; } bool MostCV::LevelDBReader::GetNextEntry(string &key, vector &retVec, int &label) { if (!database_iter_->Valid()) return false; Datum datum; datum.clear_float_data(); datum.clear_data(); datum.ParseFromString(database_iter_->value().ToString()); key = database_iter_->key().ToString(); label = datum.label(); int expected_data_size = std::max(datum.data().size(), datum.float_data_size()); const int datum_volume_size = datum.channels() * datum.height() * datum.width(); if (expected_data_size != datum_volume_size) { cout << "Something wrong in saved data."; assert(false); } retVec.resize(datum_volume_size); const string& data = datum.data(); if (data.size() != 0) { // Data stored in string, e.g. just pixel values of 196608 = 256 * 256 * 3 for (int i = 0; i < datum_volume_size; ++i) retVec[i] = data[i]; } else { // Data stored in real feature vector such as 4096 from feature extraction for (int i = 0; i < datum_volume_size; ++i) retVec[i] = datum.float_data(i); } database_iter_->Next(); ++record_idx_; return true; } bool MostCV::LevelDBReader::GetNextEntryByKey(const string & name, vector &retVec, int &label) { if (!is_caching) { cout << "A sorted file MUST be given. What are you trying to retrive!\n"; assert(false); } if (cache_.count(name)) { retVec = cache_[name]; return true; } string key; while (GetNextEntry(key, retVec, label)) { if ((int) cache_items_.size() == cache_limit_) { map >::iterator it = cache_.find(cache_items_.front()); assert(it != cache_.end()); cache_.erase(it); cache_items_.pop_front(); } cache_[vectors_names_[record_idx_ - 1]] = retVec; cache_items_.push_back(vectors_names_[record_idx_ - 1]); if (vectors_names_[record_idx_ - 1] == name) return true; } cout << "Reached end of data: Total Records: " << record_idx_ << "\n"; cout << "Failed to find data for: " << name << " in database path: " << database_path_ << "\n"; assert(false); // We failed to retrieve! return false; } void MostCV::LevelDBReader::Dump(const string & file_path, int featureVectorLimit) { record_idx_ = 0; database_iter_->SeekToFirst(); ofstream ofs(file_path.c_str()); vector retVec; string key; int label; while (GetNextEntry(key, retVec, label)) { ofs << "key=" << key << ", label=" << label << ", features length=" << retVec.size(); if (featureVectorLimit > 0) { ofs << ", truncated"; retVec.resize(featureVectorLimit); // To avoid writing much } ofs << ", feature vec= "; for (size_t i = 0; i < retVec.size(); ++i) ofs << retVec[i] << " "; ofs << "\n"; } ofs.close(); cout << "\nDump done: Total Records: " << record_idx_ << "\n"; } void MostCV::LevelDBReader::DumpSmall(const string &file_path, int featureVectorLimit, bool make_random) { record_idx_ = 0; database_iter_->SeekToFirst(); ofstream ofs(file_path.c_str()); vector retVec; string key; int label; for (int cnt = 0; cnt < 500 && GetNextEntry(key, retVec, label); ++cnt) { ofs << "key=" << key << ", label=" << label << ", features length=" << retVec.size(); if (make_random) std::random_shuffle(retVec.begin(), retVec.end()); if (featureVectorLimit > 0) { ofs << ", truncated"; retVec.resize(featureVectorLimit); // To avoid writing much } ofs << ", feature vec= "; for (size_t i = 0; i < retVec.size(); ++i) ofs << retVec[i] << " "; ofs << "\n"; } ofs.close(); cout << "\nDump done: Total Records: " << record_idx_ << "\n"; } void MostCV::LevelDBReader::ReadLabels(vector &labels, int max_rows) { record_idx_ = 0; database_iter_->SeekToFirst(); labels.clear(); string key; int label; vector retVec; for (int row = 0; GetNextEntry(key, retVec, label); ++row) { if(max_rows != -1 && max_rows == row) break; labels.push_back(label); } } int MostCV::LevelDBReader::GetRecordsCount() { record_idx_ = 0; database_iter_->SeekToFirst(); string key; int label; vector retVec; while (GetNextEntry(key, retVec, label)) ; return record_idx_; } void MostCV::LevelDBReader::SeekToHead() { record_idx_ = 0; database_iter_->SeekToFirst(); } ================================================ FILE: src/leveldb-reader.h ================================================ /* * leveldb-reader.h * * Created on: 2015-05-21 * Author: Moustafa S. Ibrahim */ /* * The file handles the reading of leveldb files. The database hold set of feature vectors of same length. */ #ifndef LEVELDB_READER_H_ #define LEVELDB_READER_H_ #include #include #include #include #include #include #include #include using std::map; using std::deque; using std::vector; using std::string; using std::endl; using std::cout; #include #include #include #include #include "caffe/proto/caffe.pb.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/net.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" #include "caffe/util/io.hpp" #include "caffe/vision_layers.hpp" using caffe::Blob; using caffe::Caffe; using caffe::Datum; using caffe::Net; namespace MostCV { /* * The class opens a leveldb directory, which has set of feature vectors (e.g. extracted by feature_extract tool from caffe tool). * In order, each feature vector has a name is a given in a "sorted" file. * User either can retrieve all feature vectors in order or filter based on name. * * User is expected to use one type only of the GetNextEntry methods. Similarly, if user used Dump method, shouldn't try to use other methods. * Reason behind such limitation: All the methods seek in the file. E.g., after dumping, no more rows to read. * * Usage Example: * * LevelDBReader reader(database_path, sorted_images_list_file); * vector feature_vector; * * while(reader.GetNextEntry()) * doSomething(feature_vector); * */ class LevelDBReader { public: /* * Open and prepare the database for reading. The database is allowed to have more rows than the file such that extra rows has no corresponding name. * * The file names should be sorted. Reason behind that is allowing efficient retrieval (e.g. using caching to last 200 rows). As a result, leveldb should be sorted too based on this key. * * In case no file given, then Just retrieve sequentially from DB. This is more suitable for dumping purposes. */ LevelDBReader(const string & database_path, const string & sorted_list_file = ""); ~LevelDBReader(); // Read the next entry from the file. If no more rows, return false. bool GetNextEntry(string &key, vector &retVec, int &label); // Given entry name from the sorted_images_list_file, return corresponding vector. Consecutive calls should be ordered in name. // If not so, it shouldn't be far from the last sorted element to be retrieved from caching. We cache last X elements. bool GetNextEntryByKey(const string & name, vector &retVec, int &label); // For debugging purposes, dump the database to a file. Truncate after the first "limit" elements. void Dump(const string &file_path, int featureVectorLimit = -1); void DumpSmall(const string &file_path, int featureVectorLimit = -1, bool make_random = true); void ReadLabels(vector &labels, int max_rows = -1); int GetRecordsCount(); void SeekToHead(); private: bool is_caching; vector vectors_names_; string database_path_; leveldb::DB* database_; leveldb::Iterator* database_iter_; // Caching Variables map > cache_; deque cache_items_; int cache_limit_; // Current row index in retrieval int record_idx_; }; } #endif /* LEVELDB_READER_H_ */ ================================================ FILE: src/leveldb-writer.cpp ================================================ /* * LeveldbWriter.cpp * * Created on: 2015-04-02 * Author: Moustafa S. Ibrahim */ #include #include "leveldb-writer.h" using std::cerr; using std::cout; #include "utilities.h" const int WRITING_LIMIT = 1000; namespace MostCV { LeveldbWriter::LeveldbWriter(string db_path_, int resize_height_, int resize_width_, int volumeSize, bool is_virtual_) { max_label_cnt = -1; db_path = db_path_; resize_height = resize_height_; resize_width = resize_width_; volume_size = volumeSize; is_virtual = is_virtual_; cerr<<"\n\nCreates a database at: "< 0) { // then something already defined for the shape datum.set_channels(volume_size); datum.set_height(resize_height); datum.set_width(resize_width); cerr<<"\t(H, W, C) = "<max_label_cnt = max_label_cnt; } void LeveldbWriter::setDatumLabel(int id) { assert(!is_closed); assert(id >= 0); if (max_label_cnt != -1 && id >= max_label_cnt) { cerr << "Wrong label! (Received, expected) = " << id << " - " << max_label_cnt << "\n"; assert(false); } datum.set_label(id); labels.insert(id); labelsVec.push_back(id); } void LeveldbWriter::addDatumToBatch(string key) { assert(!is_closed); if (key != "" && keys.insert(key).second == false) cerr << "Warning: key duplication: " << key << "\n"; if(is_virtual) return; string value; assert(datum.SerializeToString(&value)); string prefix = MostCV::toIntStr("0000000", internal_idx++) + "@"; batch->Put(prefix + key, value); if (++countId % WRITING_LIMIT == 0) writeBatch(); clearDatum(); } void LeveldbWriter::addDatumToBatch(caffe::Datum &datum, string key, int label) { assert(!is_closed); if (keys.insert(key).second == false) cerr << "Warning: Key duplication: " << key << "\n"; assert(label >= 0); string value; datum.set_label(label); labels.insert(label); labelsVec.push_back(label); if(is_virtual) return; assert(datum.SerializeToString(&value)); string prefix = MostCV::toIntStr("0000000", internal_idx++) + "@"; batch->Put(prefix + key, value); if (++countId % WRITING_LIMIT == 0) writeBatch(); clearDatum(); } bool LeveldbWriter::addVectorDatum(const vector &feature_vec) { assert(!is_closed); if(is_virtual) return true; clearDatum(); if (resize_height <= 0) { // use first vector to define the outline datum.set_height(resize_height = feature_vec.size()); datum.set_channels(1); datum.set_width(1); } else assert((int )feature_vec.size() == resize_height * resize_width * volume_size); for (int p = 0; p < (int) feature_vec.size(); ++p) datum.add_float_data(feature_vec[p]); return true; } bool LeveldbWriter::addImageToDatum(Mat imgMat_origin, int num_channels) { assert(!is_closed); if(is_virtual) return true; assert(resize_width > 0 && resize_height > 0); assert(imgMat_origin.channels() == num_channels); // Weird to send it :D Mat imgMat; cv::resize(imgMat_origin, imgMat, Size(resize_width, resize_height)); // add to db: 256 * 256 * 3 = 196608 string* datum_string = datum.mutable_data(); if (num_channels == 3) { for (int c = 0; c < num_channels; ++c) { for (int h = 0; h < imgMat.rows; ++h) { for (int w = 0; w < imgMat.cols; ++w) { datum_string->push_back(static_cast(imgMat.at(h, w)[c])); } } } } else { for (int h = 0; h < imgMat.rows; ++h) { for (int w = 0; w < imgMat.cols; ++w) { datum_string->push_back(static_cast(imgMat.at(h, w))); } } } return true; } bool LeveldbWriter::addImageToDatum(const string& filename, int num_channels) { assert(!is_closed); if(is_virtual) return true; int cv_read_flag = (num_channels == 3 ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE); Mat imgMat_origin = cv::imread(filename, cv_read_flag); if (!imgMat_origin.data) { LOG(ERROR)<< "Could not open or find file " << filename; return false; } return addImageToDatum(imgMat_origin, num_channels); } void LeveldbWriter::writeBatch() { if (is_closed) return; if(is_virtual) return; if (countId == lastCountId) // nothing changed return; leveldb::Status status = db->Write(leveldb::WriteOptions(), batch); CHECK(status.ok()) << "Failed to write the batch. Count id: " << countId << "\n"; delete batch; batch = new leveldb::WriteBatch(); LOG(ERROR)< #include #include using std::vector; using std::set; using std::string; #include #include #include #include "caffe/proto/caffe.pb.h" #include "opencv2/core/core.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" using cv::Mat; using cv::Size; namespace MostCV { class LeveldbWriter { public: // Using zero parameters would mean not interested to add addImageToDatum functionality. LeveldbWriter(string db_path, int resize_height = -1, int resize_width = 1, int volumeSize = 1, bool is_virtual = false); ~LeveldbWriter(); void clearDatum(); void setDatumLabel(int id); bool addImageToDatum(const string& filename, int num_channels); bool addImageToDatum(Mat img, int num_channels); bool addVectorDatum(const vector &feature_vec); void addDatumToBatch(string key = ""); void addDatumToBatch(caffe::Datum &datum, string key, int label); void setLabelsRange(int max_label_cnt); void forceFinalize(); private: void writeBatch(); leveldb::DB* db; leveldb::WriteBatch* batch; caffe::Datum datum; int countId; int lastCountId; string db_path; int resize_height; int resize_width; int volume_size; int internal_idx; set labels; //helps in verification. vector labelsVec; // print purposes int max_label_cnt; set keys; bool is_closed; bool is_virtual; }; } #endif /* LeveldbWriter_H_ */ ================================================ FILE: src/rect-helper.cpp ================================================ /* * RectHelper.cpp * * Created on: 2015-07-06 * Author: Moustafa S. Ibrahim */ #include "rect-helper.h" #include "images-utilities.h" #include "utilities.h" #include "custom-macros.h" namespace MostCV { RectHelper::RectHelper(Rect rect, double score) { r = rect; conf_score = score; color = Scalar(rand() % 256, rand() % 256, rand() % 256); // random color } vector RectHelper::ToRectHelpers(const vector &rectangles_vec) { vector ret; for(auto rect : rectangles_vec) ret.push_back(RectHelper(rect)); return ret; } vector RectHelper::ToRects(const vector &rectangles_vec) { vector ret; for(auto rect : rectangles_vec) ret.push_back(rect.r); return ret; } //////////////////////////// Static Methods ///////////////////////////// void RectHelper::DrawRects(Mat img, const vector &rectangles_vec, bool is_make_copy, bool is_show, Scalar color) { Mat imgTemp; if (is_make_copy) { img.copyTo(imgTemp); img = imgTemp; } for (auto rect_helper : rectangles_vec) cv::rectangle(img, rect_helper.r, (color[0] == -1) ? rect_helper.color : color, 2); int maxArea = 600 * 800; int dif = sqrt(img.rows * img.cols / maxArea); if(dif > 1) { Size size(img.cols / dif, img.rows / dif); Mat toImg; cv::resize(img, toImg, size); img = toImg; } MostCV::ShowImage(img, 0, is_show); } map > RectHelper::LoadImagesRectangles(string path_x1_y1_w_h){ map > retMap; ifstream ifs(path_x1_y1_w_h); int cnt; string image_name; while(ifs>>image_name>>cnt) { vector rectHelpers; while(cnt--) { double x, y, w, h; double score; ifs>>x>>y>>w>>h>>score; rectHelpers.push_back(RectHelper(Rect(x, y, w, h), score)); } retMap[image_name] = rectHelpers; } ifs.close(); return retMap; } void RectHelper::WriteImagesRectangles(const map > &image_rect_helpers_Map, string path_x1_y1_w_h) { ofstream ofs(path_x1_y1_w_h); for (auto img_rects_pair : image_rect_helpers_Map) { ofs< &rects, double conf_score_threshold) { for (size_t i = 0; i < rects.size(); ++i) { if(MostCV::dcmp(rects[i].conf_score, conf_score_threshold) < 0) { rects.erase(rects.begin() + i); --i; } } } bool __CmpSortByConfidence(const RectHelper &a, const RectHelper& b) { return MostCV::dcmp(a.conf_score, b.conf_score) < 0; } void RectHelper::SortByConfidence(vector &rects) { sort(RALL(rects), __CmpSortByConfidence); } bool __CmpSortByTopLeftPoint(const RectHelper &a, const RectHelper& b) { int d = MostCV::dcmp(a.r.x, b.r.x); if(d != 0) return d < 0; return MostCV::dcmp(a.r.y, b.r.y) < 0; } void RectHelper::SortByTopLeftPoint(vector &rects) { sort(RALL(rects), __CmpSortByTopLeftPoint); } } ================================================ FILE: src/rect-helper.h ================================================ /* * RectHelper.h * * Created on: 2015-07-06 * Author: Moustafa S. Ibrahim */ #ifndef RECTHELPER_H_ #define RECTHELPER_H_ #include #include #include #include #include using std::vector; using std::map; using std::string; using std::endl; using std::cout; using std::ifstream; using std::ofstream; #include "opencv2/core/core.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" using cv::Mat; using cv::Scalar; using cv::Rect; using cv::Point; using cv::Size; namespace MostCV { class RectHelper { public: RectHelper(Rect rect = Rect(0, 0, 0, 0), double score = -1); static vector ToRectHelpers(const vector &rectangles_vec); static vector ToRects(const vector &rectangles_vec); static void DrawRects(Mat img, const vector &rectangles_vec, bool is_make_copy = true, bool is_show = true, Scalar color = Scalar(-1, -1, -1)); static void SortByConfidence(vector &rects); static void SortByTopLeftPoint(vector &rects); static void FilterBelowConfidenceThreshold(vector &rects, double conf_score_threshold); static map > LoadImagesRectangles(string path_x1_y1_w_h); static void WriteImagesRectangles(const map > &imageRectHelpersMap, string path_x1_y1_w_h); Rect r; double conf_score; string category; // E.g. Car bbox int category_idx; Scalar color; // For drawing Mat image; // Image the rectangle belong to it string image_name; string image_path; string image_parent_path; }; } #endif /* RECTHELPER_H_ */ ================================================ FILE: src/utilities.cpp ================================================ /* * Utilities.cpp * * Created on: 2015-03-13 * Author: Moustafa S. Ibrahim */ #include "utilities.h" #include #include #include #include using std::memcpy; using std::fabs; #include #include #include #include #include namespace bst_fs = boost::filesystem; using namespace boost::filesystem; #include "custom-abbreviation.h" namespace MostCV { int dcmp(double x, double y) { return fabs(x - y) <= EPS ? 0 : x < y ? -1 : 1; } map BuildStringIdMap(set classes) { map classId; REPIT(strIt, classes) { string str = *strIt; if (classId.count(str) == 0) { int sz = classId.size(); classId[str] = sz; } } return classId; } map BuildStringIdMap(vector classesVec) { set classes(classesVec.begin(), classesVec.end()); return BuildStringIdMap(classes); } int UpdateStringIdMap(map &classId, string str) { if (classId.count(str) == 0) { int sz = classId.size(); classId[str] = sz; return sz; } return classId[str]; } double round(double d, int precision) { ostringstream oss; oss.setf(std::ios::fixed); oss.precision(precision); oss << d; istringstream iss(oss.str()); iss >> d; return d; } void fixDir(string &dir) { if (SZ(dir) == 0) return; if (dir[SZ(dir) - 1] != PATH_SEP) dir += PATH_SEP; } string getFileName(string dir) { int idx = dir.find_last_of(PATH_SEP); if (idx == -1) return dir; return dir.substr(idx + 1); } bool fileExist(string szFilePath, bool print) { ifstream fin(szFilePath.c_str()); if (!fin) { if (print) printf("fileExist: Failed to open file [%s]\n", szFilePath.c_str()); return false; } fin.close(); return true; } string trim(string str) { int s = 0, e = SZ(str) - 1; REP(i, str) { if (!isspace(str[i])) break; s++; } LPD(i, SZ(str)-1, 0) { if (!isspace(str[i])) break; e--; } if (s > e) return ""; return str.substr(s, e - s + 1); } string toLower(string str) { string ret = ""; REP(i, str) ret += tolower(str[i]); return ret; } string toUpper(string str) { string ret = ""; REP(i, str) ret += toupper(str[i]); return ret; } bool startsWith(string str, string pat) { return (int) str.find(pat) == 0; } int random(int range) { return rand() % range; } char* toCharArr(string str) { char *s = new char[SZ(str) + 1]; s[SZ(str)] = '\0'; memcpy(s, str.c_str(), SZ(str)); return s; } string toIntStr(string st, int add, bool append_zeros) { int val = toType(st, 1); val += add; string ret = toString(val); if (append_zeros && ret.size() < st.size()) ret = string(st.size() - ret.size(), '0') + ret; //pad zeros return ret; } string removeExt(string name) { int pos = name.find_last_of('.'); if (pos != -1) name = name.substr(0, pos); return name; } bool IsPathExist(string path) { return boost::filesystem::exists(path); } int CountFileLines(string path) { std::ifstream inFile(path); if(inFile.fail()) { cerr<<"Couldn't open path: "<(inFile), std::istreambuf_iterator(), '\n'); inFile.close(); return ans; } vector GetPerm(int length, int seed) { boost::mt19937 randGenerator(seed); boost::uniform_int<> uniform_int_dist; boost::variate_generator > rand_generator(randGenerator, uniform_int_dist); vector perm(length); for (int i = 0; i < (int) perm.size(); ++i) perm[i] = i; return perm; } string consumeStringParam(int &argc, char** &argv, string variable_name) { return consumeParam(argc, argv, string(""), variable_name); } int consumeIntParam(int &argc, char** &argv, string variable_name) { return consumeParam(argc, argv, 1, variable_name); } double consumeDoubleParam(int &argc, char** &argv, string variable_name) { return consumeParam(argc, argv, 1.0, variable_name); } vector GetDirs(string szRoot) { vector ret; for (bst_fs::directory_iterator itr(szRoot); itr != bst_fs::directory_iterator(); ++itr) { string path_str = itr->path().c_str(); if (bst_fs::is_directory((itr->status()))) ret.push_back(path_str); } sort(ret.begin(), ret.end()); return ret; } vector GetDirsNames(string szRoot) { vector ret; for (bst_fs::directory_iterator itr(szRoot); itr != bst_fs::directory_iterator(); ++itr) { string path_str = itr->path().c_str(); if (bst_fs::is_directory((itr->status()))) ret.push_back(itr->path().filename().c_str()); } sort(ret.begin(), ret.end()); return ret; } vector GetFiles(string szRoot, string endwith) { vector ret; for (bst_fs::directory_iterator itr(szRoot); itr != bst_fs::directory_iterator(); ++itr) { string path_str = itr->path().c_str(); if (bst_fs::is_regular_file((itr->status()))) { if(endwith == "" || boost::algorithm::ends_with(path_str, endwith)) ret.push_back(path_str); } } sort(ret.begin(), ret.end()); return ret; } vector GetFilesExt(string szRoot, string endwith) { vector ret; for (bst_fs::directory_iterator itr(szRoot); itr != bst_fs::directory_iterator(); ++itr) { string path_str = itr->path().c_str(); if (bst_fs::is_regular_file((itr->status()))) { if(endwith == "" || boost::algorithm::ends_with(path_str, endwith)) ret.push_back(itr->path().extension().c_str()); } } sort(ret.begin(), ret.end()); return ret; } vector GetFilesNames(string szRoot, string endwith) { vector ret; for (bst_fs::directory_iterator itr(szRoot); itr != bst_fs::directory_iterator(); ++itr) { string path_str = itr->path().c_str(); if (bst_fs::is_regular_file((itr->status()))) { if(endwith == "" || boost::algorithm::ends_with(path_str, endwith)) ret.push_back(itr->path().filename().c_str()); } } sort(ret.begin(), ret.end()); return ret; } } ================================================ FILE: src/utilities.h ================================================ /* * general_utilities.h * * Created on: 2015-03-11 * Author: Moustafa S. Ibrahim */ #ifndef GENERAL_UTILITIES_H_ #define GENERAL_UTILITIES_H_ #include "custom-macros.h" #include #include #include #include #include #include #include #include using std::string; using std::ostringstream; using std::istringstream; using std::ifstream; using std::set; using std::map; using std::vector; using std::cout; using std::cerr; using std::pair; namespace MostCV { const char PATH_SEP = '/'; int dcmp(double x, double y); double round(double d, int precision); void fixDir(string &dir); bool IsPathExist(string path); string getFileName(string dir); bool fileExist(string szFilePath, bool print = true); string trim(string str); string toLower(string str); string toUpper(string str); bool startsWith(string str, string pat); int random(int range); char* toCharArr(string str); string toIntStr(string st, int add, bool append_zeros = true); string removeExt(string name); map BuildStringIdMap(set classId); map BuildStringIdMap(vector classesVec); int UpdateStringIdMap(map &items_map, string str); int CountFileLines(string path); vector GetPerm(int length, int seed = 123); string consumeStringParam(int &argc, char** &argv, string variable_name = ""); int consumeIntParam(int &argc, char** &argv, string variable_name = ""); double consumeDoubleParam(int &argc, char** &argv, string variable_name = ""); vector GetDirs(string szRoot); vector GetDirsNames(string szRoot); vector GetFiles(string szRoot, string endwith = ""); vector GetFilesExt(string szRoot, string endwith = ""); vector GetFilesNames(string szRoot, string endwith = ""); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template Type toType(string data, Type indicator, string variable_name = "") { istringstream iss(data); Type item; iss >> item; if(iss.fail()) { if(variable_name != "") cerr<<"Problem in reading variable: "< Type consumeParam(int &argc, char** &argv, Type indicator, string variable_name = "") { assert(argc > 0); string ret = argv[0]; --argc, ++argv; return toType(ret, indicator, variable_name); } template char* toCharPtr(Type val) { ostringstream oss; oss << val; return toCharArr(oss.str()); } template string toString(Type val) { ostringstream oss; oss << val; return oss.str(); } template vector readStringItems(string data, Type indicator) { vector items; Type item; istringstream iss(data); while (iss >> item) items.push_back(item); return items; } template vector readFileItems(string filePath, Type indicator, bool print = true) { vector items; Type item; ifstream fin(filePath.c_str()); if (!fin) { if (print) printf("\n\tWARNING: readFileItems: Failed to open file [%s]\n", filePath.c_str()); fflush(stdout); return items; } while (fin >> item) items.push_back(item); fin.close(); return items; } template vector readFileItems(ifstream &fin, Type indicator, int length = -1) { Type item; vector items; if(length == -1) { while (items.sizefin >> item) items.push_back(item); } else { items.resize(length); for (int pos = 0; pos < items.size(); ++pos) { fin >> item; assert(!fin.fail()); items[pos] = item; } } return items; } template vector readStreamItems(istringstream &iss, Type indicator, int length = -1) { Type item; vector items; if(length == -1) { while (iss >> item) items.push_back(item); } else { items.resize(length); for (int pos = 0; pos < items.size(); ++pos) { iss >> item; assert(!iss.fail()); items[pos] = item; } } return items; } template vector > read2dFileItems(string filePath, Type indicator, bool print = true) { vector > items; ifstream fin(filePath.c_str()); if (fin.fail()) { printf("read2dFileItems: Failed to open file [%s]\n", filePath.c_str()); assert(false); } string line; while (getline(fin, line)) { if(line != "") items.push_back(readStringItems(line, indicator)); } return items; } // For every element that has max frequency, add its position. Total elements equal to # of unqiue elements // 2 3 2 2 2 2 4 4 => 0 6 1 template vector getMaxFrequentPositions(vector &vec) { vector retVec; map > freq_map; for (int i = 0; i < vec.size(); ++i) freq_map[vec[i]].push_back(i); set >, std::greater > > > freqs; for (auto kv : freq_map) freqs.insert(std::make_pair(kv.second.size(), kv.second)); for (auto group : freqs) retVec.push_back(group.second[0]); return retVec; } template Type getMaxFrequentLabel(vector &vec) { assert(vec.size() > 0); vector pos = getMaxFrequentPositions(vec); return vec[ pos[0] ]; } template map getFrequencyMap(const vector &vec, bool print = false) { map freq_map; for (int i = 0; i < vec.size(); ++i) freq_map[vec[i]]++; if (print) { for (auto kv : freq_map) cerr << "Key = "< Value " << kv.second << " instances\n"; } return freq_map; } template map getFrequencyMapPercent(vector &vec, bool print = false) { map freq_map; for (int i = 0; i < vec.size(); ++i) freq_map[vec[i]]++; if (print) { cerr.precision(1); cerr.setf(std::ios::fixed); for (auto kv : freq_map) cerr << "Key = "< Value " << 100.0 * kv.second / (double)vec.size()<< " %\n"; } return freq_map; } template vector castVector(const vector &row, Type2 indicator) { vector ret; ret.reserve(row.size()); for(auto val : row) ret.push_back((Type2)val); return ret; } template vector> cast2DVector(const vector> &matrix, Type2 indicator) { vector> ret; ret.reserve(matrix.size()); for(auto row : matrix) ret.push_back(castVector(row, indicator)); return ret; } } #endif /* GENERAL_UTILITIES_H_ */ ================================================ FILE: src/volleyball-dataset-mgr.cpp ================================================ /* * volleyball-dataset-mgr.cpp * * Created on: Nov 28, 2015 * Author: msibrahi */ #include "volleyball-dataset-mgr.h" #include namespace bst_fs = boost::filesystem; #include "utilities.h" #include "images-utilities.h" #include #include #include namespace MostCV { map global_video_id_frame_id_to_activityId; map > global_video_id_frame_id_to_persons; map persons_actions_ids_map; map scene_activities_ids_map; // statistics map scene_activities_freq_map; map players_activities_freq_map; VolleyballVideoData::VolleyballVideoData(string video_id, string video_dir) { MostCV::fixDir(video_dir); video_id_ = video_id; video_dir_ = video_dir; string annot_file = video_dir + "annotations.txt"; vector > data2dVec = MostCV::read2dFileItems(annot_file, string(""), false); // For every frame, read the players in it for (auto frame_data : data2dVec) { VolleyballPerson person; string frame_id = frame_data[0]; GetFramePath(frame_id); // verify on hard disk frame_data.erase(frame_data.begin()); // if (frame_data[0].find("win") == string::npos) // continue; scene_activities_freq_map[ frame_data[0] ]++; int frame_activity_id = MostCV::UpdateStringIdMap(scene_activities_ids_map, frame_data[0]); annot_frame_id_to_activity_id_map_[frame_id] = frame_activity_id; frame_data.erase(frame_data.begin()); pair min_max_persons_y = { 10000, 0 }; for (int k = 0; k < (int) frame_data.size(); k += 5) { int x = MostCV::toType(frame_data[k + 0], 0); int y = MostCV::toType(frame_data[k + 1], 0); int w = MostCV::toType(frame_data[k + 2], 0); int h = MostCV::toType(frame_data[k + 3], 0); string activity_str = frame_data[k + 4]; players_activities_freq_map[activity_str]++; min_max_persons_y.first = std::min(min_max_persons_y.first, y); min_max_persons_y.second = std::max(min_max_persons_y.second, y + h); person.bbox_ = RectHelper(Rect(x, y, w, h)); person.action_id_ = MostCV::UpdateStringIdMap(persons_actions_ids_map, activity_str); annot_frame_id_persons_map_[frame_id].push_back(person); } if (min_max_persons_y.first < 0) min_max_persons_y.first = 0; annot_frame_id_to_min_max_persons_y_map_[frame_id] = min_max_persons_y; annot_frame_id_vec_.push_back(frame_id); string video_id_frame_id = video_id + "#"+frame_id; global_video_id_frame_id_to_activityId[video_id_frame_id] = frame_activity_id; global_video_id_frame_id_to_persons[video_id_frame_id] = annot_frame_id_persons_map_[frame_id]; if (annot_frame_id_persons_map_[frame_id].size() < 7) { cerr<<"video "< &persons = frame_persons_kv.second; sort(persons.begin(), persons.end(), [](const VolleyballPerson &a, const VolleyballPerson &b) { if(a.bbox_.r.x != b.bbox_.r.x) return a.bbox_.r.x < b.bbox_.r.x; return a.bbox_.r.y < b.bbox_.r.y; }); } } void VolleyballVideoData::ResetPersons(string img_name, vector rects) { annot_frame_id_persons_map_[img_name].clear(); for (auto rect : rects) { VolleyballPerson person; person.bbox_ = rect; person.action_id_ = 0; annot_frame_id_persons_map_[img_name].push_back(person); } } vector VolleyballVideoData::GetPersonsRect(string frame_id) { vector rects; for (auto person : annot_frame_id_persons_map_[frame_id]) rects.push_back(person.bbox_); return rects; } // Short Util string VolleyballVideoData::GetFramePath(string frame_id, int shift) { string frame_id_no_ext = frame_id.substr(0, frame_id.find_first_of('.')); string ext = frame_id.substr(frame_id.find_first_of('.')); string target_frame_id = MostCV::toIntStr(frame_id_no_ext, shift, false); string frame_new_path = video_dir_ + frame_id_no_ext + MostCV::PATH_SEP + target_frame_id + ext; assert(boost::filesystem::exists(frame_new_path)); return frame_new_path; } pair, vector > VolleyballVideoData::GetTemporalWindowPaths(string frame_id, int temporal_window, int step, bool is_use_expend_factor) { vector window_frames_after; vector window_frames_before; if (is_use_expend_factor) temporal_window = 2 * temporal_window + 1; LP(w, 1+temporal_window/2) { string path = GetFramePath(frame_id, -w * step); window_frames_before.push_back(path); } LP(w, (temporal_window+1)/2) { string path = GetFramePath(frame_id, w * step); window_frames_after.push_back(path); } return {window_frames_before, window_frames_after}; } vector VolleyballVideoData::GetTemporalWindowPathsMerged(string frame_id, int temporal_window, int step) { vector paths; int start = -temporal_window/2; LP(w, temporal_window) { string path = GetFramePath(frame_id, start * step); paths.push_back(path); ++start; } return paths; } void VolleyballVideoData::visualize() { for (auto frame_id : annot_frame_id_vec_) { string path = GetFramePath(frame_id); Mat img = cv::imread(path); cerr< video_ids) { for (int i = 0; i < (int) video_ids.size(); ++i) { for (int j = 0; j < (int) ids_.size(); ++j) { if (video_ids[i] != ids_[j]) continue; std::swap(ids_[i], ids_[j]); std::swap(videos_vec_[i], videos_vec_[j]); } } } vector > VolleyballDatasetPart::GetVideoFrameList(bool is_shuffled, int subset_percent) { vector > database_shuffled; return database_shuffled; boost::mt19937 generator(100); boost::uniform_int<> uni_dist; boost::variate_generator > rand_generator(generator, uni_dist); vector labels; for (auto video : videos_vec_) { int frame_pos = -1; for (auto frame_id : video.annot_frame_id_vec_) { ++frame_pos; database_shuffled.push_back(std::make_pair(video, frame_pos)); } } if (is_shuffled) { cerr << "Before: Total Shuffled Elements: " << database_shuffled.size() << " with 1st video" << database_shuffled.begin()->first.video_id_ << "\n"; std::random_shuffle(database_shuffled.begin(), database_shuffled.end(), rand_generator); cerr << "After: Total Shuffled Elements: " << database_shuffled.size() << " with 1st video" << database_shuffled.begin()->first.video_id_ << "\n"; } int max_size = subset_percent * database_shuffled.size(); database_shuffled.resize(max_size); return database_shuffled; } void VolleyballDatasetPart::visualize() { for (auto video : videos_vec_) video.visualize(); } //--------------------------------------------------------------- VolleyballDatasetMgr::VolleyballDatasetMgr(string config_dir_path, string videos_root_dir) { MostCV::fixDir(config_dir_path); dataset_division_.push_back(VolleyballDatasetPart("train", config_dir_path + "train.txt", videos_root_dir)); dataset_division_.push_back(VolleyballDatasetPart("val", config_dir_path + "val.txt", videos_root_dir)); dataset_division_.push_back(VolleyballDatasetPart("test", config_dir_path + "test.txt", videos_root_dir)); dataset_division_.push_back(VolleyballDatasetPart("trainval", config_dir_path + "trainval.txt", videos_root_dir)); total_videos_ = 0; total_frames_ = 0; // Remove empty datasets for (int i = 0; i < (int) dataset_division_.size(); ++i) { if (dataset_division_[i].videos_vec_.size() == 0) { cerr << dataset_division_[i].dataset_name_ << " dataset is EMPTY\n"; dataset_division_.erase(dataset_division_.begin() + i); --i; } } assert(dataset_division_.size() > 0); for (auto dataset : dataset_division_) { int current_fames = 0; for (auto video : dataset.videos_vec_) { total_frames_ += video.annot_frame_id_vec_.size(); current_fames += video.annot_frame_id_vec_.size(); } cerr << "Total frames for dataset " << dataset.dataset_name_ << " = " << current_fames << "\n"; total_videos_ += dataset.videos_vec_.size(); } total_scene_labels = scene_activities_ids_map.size(); total_persons_labels = persons_actions_ids_map.size(); cerr << "\nTotal videos = " << total_videos_ << " - total frames = " << total_frames_ << "\n"; cerr << "\nScenes Labels:\n"; for (auto scene_kv : scene_activities_ids_map) cerr << "\t" << scene_kv.first << " " << scene_kv.second << "\n"; cerr << "\nPersons Labels:\n"; for (auto persons_kv : persons_actions_ids_map) cerr << "\t" << persons_kv.first << " " << persons_kv.second << "\n"; cerr << "\nScenes Labels frequency:\n"; for (auto entry : scene_activities_freq_map) cerr << "\t" << entry.first << " " << entry.second << "\n"; cerr << "\nPlayers Labels frequency:\n"; for (auto entry : players_activities_freq_map) cerr << "\t" << entry.first << " " << entry.second << "\n"; } int VolleyballDatasetMgr::GetActivityId(string video_id, string frame_id) { string video_id_frame_id = video_id + "#"+frame_id; if (global_video_id_frame_id_to_activityId.count(video_id_frame_id) == 0) { cerr<<"problem with "< VolleyballDatasetMgr::GetPersons(string video_id, string frame_id) { string video_id_frame_id = video_id + "#"+frame_id; assert( global_video_id_frame_id_to_persons.count(video_id_frame_id) ); return global_video_id_frame_id_to_persons[video_id_frame_id]; } // verify 2*w+1 elements..e.g. centered around every frame void VolleyballDatasetMgr::VerifyDataAvailbility(int temporal_window) { for (auto dataset : dataset_division_) { cerr<<"Verifying dataset: "< #include #include using std::vector; using std::set; using std::string; using std::pair; #include "opencv2/core/core.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" using cv::Mat; using cv::Size; using cv::Ptr; #include "rect-helper.h" namespace MostCV { class VolleyballPerson { public: RectHelper bbox_; int action_id_; }; class VolleyballVideoData { public: VolleyballVideoData() {} VolleyballVideoData(string video_id, string video_dir); string GetFramePath(string frame_id, int shift = 0); pair< vector, vector > GetTemporalWindowPaths(string frame_id, int temporal_window, int step = 1, bool is_use_expend_factor = true); vector GetTemporalWindowPathsMerged(string frame_id, int temporal_window, int step = 1); void ResetPersons(string frame_id, vector rects); vector GetPersonsRect(string frame_id); void SortPersonsPerFrames(); void visualize(); string video_id_; string video_dir_; vector annot_frame_id_vec_; map annot_frame_id_to_activity_id_map_; map> annot_frame_id_to_min_max_persons_y_map_; map > annot_frame_id_persons_map_; }; class VolleyballDatasetPart { public: VolleyballDatasetPart() {} VolleyballDatasetPart(string dataset_name, string config_file, string videos_root_dir); void ReorderVideos(vector video_ids); vector > GetVideoFrameList(bool is_shuffled, int subset_percent); void visualize(); vector ids_; vector videos_vec_; string dataset_name_; string dataset_db_name_; string dataset_db_path_; }; class VolleyballDatasetMgr { public: VolleyballDatasetMgr(string config_dir_path, string videos_root_dir); void VerifyDataAvailbility(int temporal_window); int GetActivityId(string video_id, string frame_id); vector GetPersons(string video_id, string frame_id); vector dataset_division_; int total_videos_; int total_frames_; int total_scene_labels; int total_persons_labels; }; } #endif /* VOLLEYBALL_DATASET_MGR_H_FINAL_DATASET_ */ ================================================ FILE: volleyball-simple/39/annotations.txt ================================================ 29885.jpg r_spike 430 575 82 170 waiting 563 491 82 177 waiting 585 645 94 190 digging 990 452 54 168 standing 1003 501 50 240 blocking 957 662 75 189 standing 1203 415 94 150 moving 1137 490 93 180 standing 1135 488 108 218 spiking 1272 718 121 187 moving 1402 539 96 168 moving 1648 535 70 167 standing 29905.jpg l-pass 619 582 86 131 falling 784 581 122 205 falling 740 505 107 139 waiting 1099 609 103 207 standing 1149 455 58 144 standing 1315 678 87 198 standing 1268 401 72 166 standing 1175 576 54 193 standing 1231 543 68 190 standing 1336 485 92 172 standing 1389 549 84 182 standing 1723 556 60 175 standing ================================================ FILE: volleyball-simple/41/annotations.txt ================================================ 19515.jpg l-pass 1487 607 61 146 standing 1399 685 48 157 standing 1367 555 78 128 standing 1031 641 61 158 standing 1022 584 42 151 standing 999 556 63 133 standing 901 594 55 154 standing 722 577 51 128 standing 649 615 60 155 digging 573 669 96 124 standing 510 537 60 111 moving 694 493 55 110 standing 19560.jpg r_spike 1383 574 65 133 standing 1359 610 53 140 standing 1063 611 59 158 standing 1101 565 64 155 standing 1117 547 48 133 standing 1123 522 53 119 standing 899 579 53 154 standing 800 634 53 171 standing 726 550 52 138 standing 760 496 64 137 moving 560 496 42 179 setting 502 616 61 153 standing