001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.examples.image.feature.local;
031
032import java.io.IOException;
033import java.net.URL;
034import java.util.HashMap;
035import java.util.Map;
036import java.util.Map.Entry;
037
038import org.openimaj.data.DataSource;
039import org.openimaj.feature.SparseIntFV;
040import org.openimaj.feature.SparseIntFVComparison;
041import org.openimaj.feature.local.data.LocalFeatureListDataSource;
042import org.openimaj.feature.local.list.LocalFeatureList;
043import org.openimaj.image.FImage;
044import org.openimaj.image.ImageUtilities;
045import org.openimaj.image.feature.local.aggregate.BagOfVisualWords;
046import org.openimaj.image.feature.local.engine.DoGSIFTEngine;
047import org.openimaj.image.feature.local.keypoints.Keypoint;
048import org.openimaj.ml.clustering.ByteCentroidsResult;
049import org.openimaj.ml.clustering.assignment.HardAssigner;
050import org.openimaj.ml.clustering.kmeans.ByteKMeans;
051
052/**
053 * Example showing the steps to build a Bag of Visual Words using local features
054 * for some images. In the example we perform the K-means clustering on the
055 * features of the same images that we want to build BoVW representations for,
056 * however in reality, you're more likely to do the clustering on a different
057 * set of images (perhaps even on a random sample of their features).
058 * 
059 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
060 * 
061 */
062public class BoVWExample {
063        /**
064         * Main method
065         * 
066         * @param args
067         *            ignored
068         * @throws IOException
069         *             if the image can't be read
070         */
071        public static void main(String[] args) throws IOException {
072                final URL[] imageUrls = {
073                                new URL("http://users.ecs.soton.ac.uk/dpd/projects/openimaj/tutorial/hist1.jpg"),
074                                new URL("http://users.ecs.soton.ac.uk/dpd/projects/openimaj/tutorial/hist2.jpg"),
075                                new URL("http://users.ecs.soton.ac.uk/dpd/projects/openimaj/tutorial/hist3.jpg")
076                };
077
078                // Create an engine to extract some local features; in this case, we'll
079                // get SIFT features located at the extrema in the
080                // difference-of-Gaussian pyramid.
081                final DoGSIFTEngine engine = new DoGSIFTEngine();
082
083                // Now we load some load some images and extract their features. As
084                // we're going to reuse these, we'll store them in a map of the image
085                // identifier to list of local features.
086                final Map<URL, LocalFeatureList<Keypoint>> imageKeypoints = new HashMap<URL, LocalFeatureList<Keypoint>>();
087                for (final URL url : imageUrls) {
088                        // load image
089                        final FImage image = ImageUtilities.readF(url);
090
091                        // extract the features and store them in the map against the image
092                        // url
093                        imageKeypoints.put(url, engine.findFeatures(image));
094                }
095
096                // Next we need to cluster the features to build the set of visual
097                // terms. We'll setup the clustering algorithm to create 200 visual
098                // terms using approximate k-means.
099                final ByteKMeans kmeans = ByteKMeans.createKDTreeEnsemble(200);
100
101                // We need to get the data in the correct format for the clustering.
102                // This can be done manually by copying the raw feature data into an
103                // array, or by creating a DataSource as shown:
104                final DataSource<byte[]> datasource = new LocalFeatureListDataSource<Keypoint, byte[]>(imageKeypoints);
105
106                // Then we use the DataSource as input to the clusterer and get the
107                // resultant centroids
108                final ByteCentroidsResult result = kmeans.cluster(datasource);
109
110                // In this example we want to create a standard BoVW model which uses
111                // hard-assignment; this means that each local feature is mapped to a
112                // single visual word. We can just use the default hard assigner to
113                // achieve this.
114                final HardAssigner<byte[], ?, ?> assigner = result.defaultHardAssigner();
115
116                // We create a new BagOfVisualWords instance using our assigner, and
117                // then use this to extract a vector representing the number of
118                // occurrences of each visual word in our input images.
119                final BagOfVisualWords<byte[]> bovw = new BagOfVisualWords<byte[]>(assigner);
120
121                // We'll store the resultant occurrence vectors in a map
122                final Map<URL, SparseIntFV> occurrences = new HashMap<URL, SparseIntFV>();
123                for (final Entry<URL, LocalFeatureList<Keypoint>> entry : imageKeypoints.entrySet()) {
124                        occurrences.put(entry.getKey(), bovw.aggregate(entry.getValue()));
125                }
126
127                // That's basically it; from this point onwards you could use the
128                // vectors to train a classifier, or measure the distance between them
129                // to assess the similarity of the input images. To finish up, we'll
130                // compute and print the distance matrix of our input images:
131                for (final Entry<URL, SparseIntFV> entry1 : occurrences.entrySet()) {
132                        for (final Entry<URL, SparseIntFV> entry2 : occurrences.entrySet()) {
133                                // this computes the Euclidean distance. Note that we're not
134                                // normalising the vectors here, but in reality you probably
135                                // would want to.
136                                final double distance = SparseIntFVComparison.EUCLIDEAN.compare(entry1.getValue(), entry2.getValue());
137
138                                System.out.format("%2.3f\t", distance);
139                        }
140                        System.out.println();
141                }
142        }
143}