001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.examples.image.feature.local; 031 032import java.io.IOException; 033import java.net.URL; 034import java.util.HashMap; 035import java.util.Map; 036import java.util.Map.Entry; 037 038import org.openimaj.data.DataSource; 039import org.openimaj.feature.SparseIntFV; 040import org.openimaj.feature.SparseIntFVComparison; 041import org.openimaj.feature.local.data.LocalFeatureListDataSource; 042import org.openimaj.feature.local.list.LocalFeatureList; 043import org.openimaj.image.FImage; 044import org.openimaj.image.ImageUtilities; 045import org.openimaj.image.feature.local.aggregate.BagOfVisualWords; 046import org.openimaj.image.feature.local.engine.DoGSIFTEngine; 047import org.openimaj.image.feature.local.keypoints.Keypoint; 048import org.openimaj.ml.clustering.ByteCentroidsResult; 049import org.openimaj.ml.clustering.assignment.HardAssigner; 050import org.openimaj.ml.clustering.kmeans.ByteKMeans; 051 052/** 053 * Example showing the steps to build a Bag of Visual Words using local features 054 * for some images. In the example we perform the K-means clustering on the 055 * features of the same images that we want to build BoVW representations for, 056 * however in reality, you're more likely to do the clustering on a different 057 * set of images (perhaps even on a random sample of their features). 058 * 059 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 060 * 061 */ 062public class BoVWExample { 063 /** 064 * Main method 065 * 066 * @param args 067 * ignored 068 * @throws IOException 069 * if the image can't be read 070 */ 071 public static void main(String[] args) throws IOException { 072 final URL[] imageUrls = { 073 new URL("http://users.ecs.soton.ac.uk/dpd/projects/openimaj/tutorial/hist1.jpg"), 074 new URL("http://users.ecs.soton.ac.uk/dpd/projects/openimaj/tutorial/hist2.jpg"), 075 new URL("http://users.ecs.soton.ac.uk/dpd/projects/openimaj/tutorial/hist3.jpg") 076 }; 077 078 // Create an engine to extract some local features; in this case, we'll 079 // get SIFT features located at the extrema in the 080 // difference-of-Gaussian pyramid. 081 final DoGSIFTEngine engine = new DoGSIFTEngine(); 082 083 // Now we load some load some images and extract their features. As 084 // we're going to reuse these, we'll store them in a map of the image 085 // identifier to list of local features. 086 final Map<URL, LocalFeatureList<Keypoint>> imageKeypoints = new HashMap<URL, LocalFeatureList<Keypoint>>(); 087 for (final URL url : imageUrls) { 088 // load image 089 final FImage image = ImageUtilities.readF(url); 090 091 // extract the features and store them in the map against the image 092 // url 093 imageKeypoints.put(url, engine.findFeatures(image)); 094 } 095 096 // Next we need to cluster the features to build the set of visual 097 // terms. We'll setup the clustering algorithm to create 200 visual 098 // terms using approximate k-means. 099 final ByteKMeans kmeans = ByteKMeans.createKDTreeEnsemble(200); 100 101 // We need to get the data in the correct format for the clustering. 102 // This can be done manually by copying the raw feature data into an 103 // array, or by creating a DataSource as shown: 104 final DataSource<byte[]> datasource = new LocalFeatureListDataSource<Keypoint, byte[]>(imageKeypoints); 105 106 // Then we use the DataSource as input to the clusterer and get the 107 // resultant centroids 108 final ByteCentroidsResult result = kmeans.cluster(datasource); 109 110 // In this example we want to create a standard BoVW model which uses 111 // hard-assignment; this means that each local feature is mapped to a 112 // single visual word. We can just use the default hard assigner to 113 // achieve this. 114 final HardAssigner<byte[], ?, ?> assigner = result.defaultHardAssigner(); 115 116 // We create a new BagOfVisualWords instance using our assigner, and 117 // then use this to extract a vector representing the number of 118 // occurrences of each visual word in our input images. 119 final BagOfVisualWords<byte[]> bovw = new BagOfVisualWords<byte[]>(assigner); 120 121 // We'll store the resultant occurrence vectors in a map 122 final Map<URL, SparseIntFV> occurrences = new HashMap<URL, SparseIntFV>(); 123 for (final Entry<URL, LocalFeatureList<Keypoint>> entry : imageKeypoints.entrySet()) { 124 occurrences.put(entry.getKey(), bovw.aggregate(entry.getValue())); 125 } 126 127 // That's basically it; from this point onwards you could use the 128 // vectors to train a classifier, or measure the distance between them 129 // to assess the similarity of the input images. To finish up, we'll 130 // compute and print the distance matrix of our input images: 131 for (final Entry<URL, SparseIntFV> entry1 : occurrences.entrySet()) { 132 for (final Entry<URL, SparseIntFV> entry2 : occurrences.entrySet()) { 133 // this computes the Euclidean distance. Note that we're not 134 // normalising the vectors here, but in reality you probably 135 // would want to. 136 final double distance = SparseIntFVComparison.EUCLIDEAN.compare(entry1.getValue(), entry2.getValue()); 137 138 System.out.format("%2.3f\t", distance); 139 } 140 System.out.println(); 141 } 142 } 143}