first commit. include both dpmv5 and coco tools.

tylin · Mar 5, 2015 · 40c741e · 40c741e
1 parent e242259
commit 40c741e
Show file tree

Hide file tree

Showing 321 changed files with 21,542 additions and 0 deletions.
diff --git a/000034.jpg b/000034.jpg
diff --git a/000061.jpg b/000061.jpg
diff --git a/000084.jpg b/000084.jpg
diff --git a/COPYING b/COPYING
@@ -0,0 +1,22 @@
+Copyright (C) 2011, 2012 Ross Girshick, Pedro Felzenszwalb
+Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick
+Copyright (C) 2007 Pedro Felzenszwalb, Deva Ramanan
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/INRIA/README b/INRIA/README
@@ -0,0 +1,29 @@
+This model was trained using the PASCAL VOC compatible INRIA annotations and 
+devkit. You can download them from:
+http://people.cs.uchicago.edu/~rbg/INRIA_PASCAL.tgz.
+
+This archive does not include the images -- it's just the annotations and a 
+PASCAL-compatible directory structure for holding the VOCdevkit, images, and 
+annotations. To get the images, download 
+http://pascal.inrialpes.fr/data/human/INRIAPerson.tar and then copy the 
+following files:
+
+cp INRIAPerson/Test/neg/* INRIA_PASCAL/VOCdevkit/VOC2007/Images/
+cp INRIAPerson/Test/pos/* INRIA_PASCAL/VOCdevkit/VOC2007/Images/
+cp INRIAPerson/Train/neg/* INRIA_PASCAL/VOCdevkit/VOC2007/Images/
+cp INRIAPerson/Train/pos/* INRIA_PASCAL/VOCdevkit/VOC2007/Images/
+
+To train an INRIA pedestrian model, change the pascal.dev_kit configuration 
+in voc_config.m.
+
+conf = cv(conf, 'pascal.dev_kit', [conf.paths.base_dir '/INRIA/VOCdevkit/']);
+
+(Make sure you've extracted the archive so it's in the location above.)
+
+Then train the model by running:
+
+>> global VOC_CONFIG_OVERRIDE;
+>> VOC_CONFIG_OVERRIDE = @voc_config_inriaperson;
+>> pascal('inriaperson', 1);
+
+That should reproduce the released model.
diff --git a/INRIA/inriaperson_final.mat b/INRIA/inriaperson_final.mat
diff --git a/Makefile b/Makefile
@@ -0,0 +1,5 @@
+all:
+	matlab -nodesktop -r "disp('building...'); compile; disp('done!'); quit;"
+
+clean:
+	rm -rf bin/*.mex*
diff --git a/README b/README
@@ -0,0 +1,216 @@
+Information
+===========
+
+Welcome to voc-release5.
+
+This is the companion code-release for my Ph.D. dissertation ("Appendix
+C").
+
+Project webpage: http://www.cs.uchicago.edu/~rbg/latent/.
+
+Release highlights (see docs/changelog for more details)
+ * Weak-label structural SVM (wl-ssvm) [4]
+ * Person grammar model (NIPS 2011) [4]
+ * Optimization improvements (faster convergence)
+ * Code cleanup, reorganization, and speed improvements
+ * Training is done in memory (no more large temp files on disk!)
+ * Scale prior
+ * Star-cascade included
+ * Bug fixes
+
+This is an implementation of our object detection system based on mixtures
+of deformable part models. This release extends the system in [2], and is
+described in my dissertation [5]. The models in this implementation are
+represented using the grammar formalism presented in [3,4,5]. The learning
+framework support both binary latent SVM and weak-label structural SVM
+(WL-SSVM), which is presented in [4,5]. The code also supports the person
+object detection grammar described in [4].
+
+The distribution contains object detection and model learning code,
+as well as models trained on the PASCAL and INRIA Person datasets. This
+release also includes code for rescoring detections based on contextual
+information and the star-cascade detection algorithm of [6].
+
+The system is implemented in MATLAB, with various helper functions and
+written in MEX C++ for efficiency reasons.
+
+More details, especially about the learning algorithm and model strcuture,
+can be found in my dissertation [5].
+
+For questions concerning the code please contact Ross Girshick at
+<ross.girshick AT gmail DOT com>.
+
+This project has been supported by the National Science Foundation under Grant
+No. 0534820, 0746569 and 0811340.
+
+
+How to Cite
+===========
+If you use this code or the pretrained models in your research, please cite
+[2] and this specific release:
+
+  @misc{voc-release5,
+    author       = "Girshick, R. B. and Felzenszwalb, P. F. and McAllester, D.",
+    title        = "Discriminatively Trained Deformable Part Models, Release 5",
+    howpublished = "http://people.cs.uchicago.edu/~rbg/latent-release5/"
+  }
+
+You may also want to cite some of the following depending on what aspects
+of this system you are using or comparing against:
+ * [4] for the NIPS 2011 person grammar model and/or Weak-Label
+       Structural SVM
+ * [6] for the cascade detection algorithm
+ * [5] if you discuss specific parts of the system that are not published
+       elsewhere
+
+
+References
+==========
+
+[1] P. Felzenszwalb, D. McAllester, D. Ramaman.  
+A Discriminatively Trained, Multiscale, Deformable Part Model.  
+Proceedings of the IEEE CVPR 2008.
+
+[2] P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan.  Object
+Detection with Discriminatively Trained Part Based Models.
+IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, September 2010.
+
+[3] P. Felzenszwalb, D. McAllester
+Object Detection Grammars.
+University of Chicago, Computer Science TR-2010-02, February 2010
+
+[4] R. Girshick, P. Felzenszwalb, D. McAllester.
+Object Detection with Grammar Models.
+Proceedings of Neural Information Processing Systems (NIPS) 2011.
+
+[5] R. Girshick.
+From Rigid Templates to Grammars: Object Detection with Structured Models.
+Ph.D. dissertation, The University of Chicago, April 2012.
+
+[6] Cascade Object Detection with Deformable Part Models
+P. Felzenszwalb, R. Girshick, D. McAllester.
+In Proceedings of the IEEE CVPR 2010.
+
+
+System Requirements
+===================
+ * Linux or OS X
+ * MATLAB
+ * GCC >= 4.2 (or an older version if it has OpenMP support)
+ * At least 4GB of memory (plus an additional 0.75GB for each
+   parallel matlab worker)
+
+The software was tested on several versions of Linux and Mac OS X using
+MATLAB versions R2011a. There may be compatibility issues with older
+versions of MATLAB.
+
+
+Basic Usage
+===========
+
+1. Unpack the code.
+2. Start matlab.
+3. Run the 'compile' function to compile the helper functions.
+   (you may need to edit compile.m to use a different convolution 
+    routine depending on your system)
+4. Load a model and an image.
+5. Use 'process' to detect objects.
+
+Example:
+>> load VOC2007/car_final.mat;       % car model trained on the PASCAL 2007 dataset
+>> im = imread('000034.jpg');        % test image
+>> bbox = process(im, model, -0.5);  % detect objects
+>> showboxes(im, bbox);              % display results
+
+The main functions defined in the object detection code are:
+
+boxes = imgdetect(im, model, thresh)              % detect objects in image im
+bbox = bboxpred_get(model.bboxpred, dets, boxes)  % bounding box location regression
+I = nms(bbox, overlap)                            % non-maximal suppression
+bbox = clipboxes(im, bbox)                        % clip boxes to image boundary
+showboxes(im, boxes)                              % visualize detections
+visualizemodel(model)                             % visualize models
+
+Their usage is demonstrated in the 'demo' script.  
+
+The directories 'VOC20??' contain matlab .mat file with models trained
+on several PASCAL datasets (the train+val subsets).  Loading one of
+these files from within matlab will define a variable 'model' with the
+model trained for a particular object category in the current workspace.  
+The value 'model.thresh' defines a threshold that can be used in the 
+'imgdetect' function to obtain a high recall rate.
+
+
+Using the learning code
+=======================
+
+1. Download and install the 2006-2011 PASCAL VOC devkit and dataset.
+   (you should set VOCopts.testset='test' in VOCinit.m)
+2. Modify 'voc_config.m' according to your configuration.
+3. Start matlab.
+4. Run the 'compile' function to compile the helper functions.
+   (you may need to edit compile.m to use a different convolution 
+    routine depending on your system)
+5. Use the 'pascal' script to train and evaluate a model. 
+
+example:
+>> pascal('bicycle', 3);   % train and evaluate a 6 component bicycle model
+
+The learning code saves a number of intermediate models in a model cache
+directory defined in 'voc_config.m'.
+
+
+Context Rescoring
+=================
+
+This release includes code for rescoring detections based on contextual
+information.  Context rescoring is performed by class-specific SVMs.
+To train these SVMs, the following steps are required.
+1) Models for all 20 PASCAL object classes must be trained.
+2) Detections must be computed on the PASCAL trainval and test datasets.
+   (The function trainval.m can be used for computing detections on the
+    trainval dataset.)
+3) Compile the included libsvm matlab interface:
+   >> cd external/libsvm-3.12/matlab/
+   >> libsvm_make
+
+After these steps have been completed, the context rescoring can be
+executed by calling 'context_rescore()'.
+
+Example:
+>> context_rescore();
+
+
+Cascaded Detection
+==================
+
+The star-cascade algorithm [7] is now included with the rest of object
+detection system.
+
+
+Multicore Support
+=================
+
+In addition to multithreaded convolutions (see notes in compile.m),
+multicore support is also available through the Matlab Parallel
+Computing Toolbox.  Various loops (e.g., negative example data mining,
+positive latent labeling, and testing) are implemented using the 'parfor'
+parallel for-loop construct.  To take advantage of the parfor loops,
+use the 'matlabpool' command.
+
+example:
+>> matlabpool open 8   % start 8 parallel matlab instances
+
+The parfor loops work without any changes when running a single
+Matlab instance.  Note that due to the use of parfor loops you may
+see non-sequential ordering of loop indexes in the terminal output when
+training and testing.  This is expected behavior.  The parallel computing
+toolbox has been tested on Linux using Matlab 2011a.
+
+The learning code, which uses Mark Schmidt's minConf for LBGFS with
+simple box constraints, now computes function gradients using OMP based
+multithreading. By default a single thread is used unless a matlabpool
+has already been opened. Note that when computing the function gradient
+with different numbers of threads, the resulting gradients will be very
+slightly different. In practice this leads to small variations in the
+resulting AP scores.
diff --git a/VOC2007/README b/VOC2007/README
@@ -0,0 +1 @@
+These models were trained using PASCAL VOC 2007 trainval.
diff --git a/VOC2007/aeroplane_final.mat b/VOC2007/aeroplane_final.mat
diff --git a/VOC2007/bicycle_final.mat b/VOC2007/bicycle_final.mat
diff --git a/VOC2007/bird_final.mat b/VOC2007/bird_final.mat
diff --git a/VOC2007/boat_final.mat b/VOC2007/boat_final.mat
diff --git a/VOC2007/bottle_final.mat b/VOC2007/bottle_final.mat
diff --git a/VOC2007/bus_final.mat b/VOC2007/bus_final.mat
diff --git a/VOC2007/car_final.mat b/VOC2007/car_final.mat
diff --git a/VOC2007/cat_final.mat b/VOC2007/cat_final.mat
diff --git a/VOC2007/chair_final.mat b/VOC2007/chair_final.mat
diff --git a/VOC2007/cow_final.mat b/VOC2007/cow_final.mat
diff --git a/VOC2007/diningtable_final.mat b/VOC2007/diningtable_final.mat
diff --git a/VOC2007/dog_final.mat b/VOC2007/dog_final.mat
diff --git a/VOC2007/horse_final.mat b/VOC2007/horse_final.mat
diff --git a/VOC2007/motorbike_final.mat b/VOC2007/motorbike_final.mat
diff --git a/VOC2007/person_final.mat b/VOC2007/person_final.mat
diff --git a/VOC2007/person_grammar_final.mat b/VOC2007/person_grammar_final.mat
diff --git a/VOC2007/pottedplant_final.mat b/VOC2007/pottedplant_final.mat
diff --git a/VOC2007/sheep_final.mat b/VOC2007/sheep_final.mat
diff --git a/VOC2007/sofa_final.mat b/VOC2007/sofa_final.mat
diff --git a/VOC2007/train_final.mat b/VOC2007/train_final.mat
diff --git a/VOC2007/tvmonitor_final.mat b/VOC2007/tvmonitor_final.mat
diff --git a/VOC2010/README b/VOC2010/README
@@ -0,0 +1 @@
+These models were trained using PASCAL VOC 2010 trainval.
diff --git a/VOC2010/aeroplane_final.mat b/VOC2010/aeroplane_final.mat
diff --git a/VOC2010/bicycle_final.mat b/VOC2010/bicycle_final.mat
diff --git a/VOC2010/bird_final.mat b/VOC2010/bird_final.mat
diff --git a/VOC2010/boat_final.mat b/VOC2010/boat_final.mat
diff --git a/VOC2010/bottle_final.mat b/VOC2010/bottle_final.mat
diff --git a/VOC2010/bus_final.mat b/VOC2010/bus_final.mat
diff --git a/VOC2010/car_final.mat b/VOC2010/car_final.mat
diff --git a/VOC2010/cat_final.mat b/VOC2010/cat_final.mat
diff --git a/VOC2010/chair_final.mat b/VOC2010/chair_final.mat
diff --git a/VOC2010/cow_final.mat b/VOC2010/cow_final.mat
diff --git a/VOC2010/diningtable_final.mat b/VOC2010/diningtable_final.mat
diff --git a/VOC2010/dog_final.mat b/VOC2010/dog_final.mat
diff --git a/VOC2010/horse_final.mat b/VOC2010/horse_final.mat
diff --git a/VOC2010/motorbike_final.mat b/VOC2010/motorbike_final.mat
diff --git a/VOC2010/person_final.mat b/VOC2010/person_final.mat
diff --git a/VOC2010/person_grammar_final.mat b/VOC2010/person_grammar_final.mat
diff --git a/VOC2010/pottedplant_final.mat b/VOC2010/pottedplant_final.mat
diff --git a/VOC2010/sheep_final.mat b/VOC2010/sheep_final.mat
diff --git a/VOC2010/sofa_final.mat b/VOC2010/sofa_final.mat
diff --git a/VOC2010/train_final.mat b/VOC2010/train_final.mat
diff --git a/VOC2010/tvmonitor_final.mat b/VOC2010/tvmonitor_final.mat
diff --git a/bbox_pred/bboxpred_data.m b/bbox_pred/bboxpred_data.m
@@ -0,0 +1,73 @@
+function [ds_all, bs_all, targets] = bboxpred_data(name)
+% Collect training data for bounding box prediction.
+%   [ds, bs, targets] = bboxpred_data(name)
+%
+% Return values
+%   ds_all    Predicted bounding boxes (clipped to the image)
+%             One cell percomponent
+%   bs_all    All filter bounding boxes (unclipped)
+%             One cell percomponent
+%   targets   Ground-truth bounding boxes (clipped)
+%             One cell percomponent
+%
+% Argument
+%   name      Object class
+
+conf = voc_config();
+
+try
+  load([conf.paths.model_dir name '_bboxdata']);
+catch
+  % load final model for class
+  load([conf.paths.model_dir name '_final']);
+  % get training data
+  pos = pascal_data(model.class, model.year);
+
+  numpos = length(pos);
+  model.interval = conf.training.interval_fg;
+  pixels = model.minsize * model.sbin / 2;
+  minsize = prod(pixels);
+  nrules = length(model.rules{model.start});
+  parb = cell(1,numpos);
+  part = cell(1,numpos);
+
+  % compute latent filter locations and record target bounding boxes
+  parfor i = 1:numpos
+    pard{i} = cell(1,nrules);
+    parb{i} = cell(1,nrules);
+    part{i} = cell(1,nrules);
+    fprintf('%s %s: bboxdata: %d/%d\n', procid(), name, i, numpos);
+    bbox = pos(i).boxes;
+    % skip small examples
+    if (bbox(3)-bbox(1)+1)*(bbox(4)-bbox(2)+1) < minsize
+      continue;
+    end
+    % get example
+    im = imreadx(pos(i));
+    [im, bbox] = croppos(im, bbox);
+    [pyra, model_dp] = gdetect_pos_prepare(im, model, bbox, 0.7);
+    [ds, bs] = gdetect_pos(pyra, model_dp, 1, ...
+                            1, 0.7, [], 0.5);
+    if ~isempty(ds)
+      % component index
+      c = ds(1,end-1);
+      bs = reduceboxes(model, bs);
+      ds = clipboxes(im, ds);
+      pard{i}{c} = [pard{i}{c}; ds(:,1:end-2)];
+      parb{i}{c} = [parb{i}{c}; bs(:,1:end-2)];
+      part{i}{c} = [part{i}{c}; bbox];
+    end
+  end
+  ds_all = cell(1,nrules);
+  bs_all = cell(1,nrules);
+  targets = cell(1,nrules);
+  for i = 1:numpos
+    for c = 1:nrules
+      ds_all{c} = [ds_all{c}; pard{i}{c}];
+      bs_all{c} = [bs_all{c}; parb{i}{c}];
+      targets{c} = [targets{c}; part{i}{c}];
+    end
+  end
+  save([conf.paths.model_dir name '_bboxdata'], ...
+       'ds_all', 'bs_all', 'targets');
+end
diff --git a/bbox_pred/bboxpred_get.m b/bbox_pred/bboxpred_get.m
@@ -0,0 +1,42 @@
+function [ds_pred, bs_pred] = bboxpred_get(bboxpred, ds, bs)
+% Get predicted bounding boxes.
+%   [bbox, bs_out] = bboxpred_get(bboxpred, ds, bs)
+%
+% Return values
+%   ds_pred   Output detection windows
+%   bs_pred   Output filter bounding boxes
+%
+% Arguments
+%   bboxpred  Bounding box prediction coefficients (see bboxpred_train.m)
+%   ds        Source detection windows
+%   bs        Source filter bounding boxes
+
+ds_pred = [];
+bs_pred = [];
+% number of components
+maxc = max(bs(:,end-1));
+for c = 1:maxc
+  % limit boxes to just component c
+  cinds = find(bs(:,end-1) == c);
+  b = bs(cinds,:);
+  d = ds(cinds,:);
+  if isempty(b)
+    continue;
+  end
+  % build test data
+  [A x1 y1 x2 y2 w h] = bboxpred_input(d, b(:,1:end-2));
+  % predict displacements
+  dx1 = A*bboxpred{c}.x1;
+  dy1 = A*bboxpred{c}.y1;
+  dx2 = A*bboxpred{c}.x2;
+  dy2 = A*bboxpred{c}.y2;
+
+  % compute object location from predicted displacements
+  tmp = [x1 + (w.*dx1) ... 
+         y1 + (h.*dy1) ...
+         x2 + (w.*dx2) ...
+         y2 + (h.*dy2) ...
+         b(:, end)];
+  ds_pred = [ds_pred; tmp];
+  bs_pred = [bs_pred; b];
+end
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		These models were trained using PASCAL VOC 2007 trainval.