001/*
002        AUTOMATICALLY GENERATED BY jTemp FROM
003        /Users/jsh2/Work/openimaj/target/checkout/machine-learning/clustering/src/main/jtemp/org/openimaj/ml/clustering/assignment/hard/Hierarchical#T#HardAssigner.jtemp
004*/
005/**
006 * Copyright (c) 2011, The University of Southampton and the individual contributors.
007 * All rights reserved.
008 *
009 * Redistribution and use in source and binary forms, with or without modification,
010 * are permitted provided that the following conditions are met:
011 *
012 *   *  Redistributions of source code must retain the above copyright notice,
013 *      this list of conditions and the following disclaimer.
014 *
015 *   *  Redistributions in binary form must reproduce the above copyright notice,
016 *      this list of conditions and the following disclaimer in the documentation
017 *      and/or other materials provided with the distribution.
018 *
019 *   *  Neither the name of the University of Southampton nor the names of its
020 *      contributors may be used to endorse or promote products derived from this
021 *      software without specific prior written permission.
022 *
023 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
024 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
025 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
026 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
027 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
028 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
029 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
030 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
032 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
033 */
034package org.openimaj.ml.clustering.assignment.hard;
035
036import org.openimaj.ml.clustering.assignment.HardAssigner;
037import org.openimaj.ml.clustering.assignment.soft.HierarchicalBytePathAssigner;
038import org.openimaj.ml.clustering.kmeans.HierarchicalByteKMeansResult;
039import org.openimaj.util.pair.IndependentPair;
040import org.openimaj.util.pair.IntFloatPair;
041
042/**
043 * The {@link HierarchicalByteHardAssigner} is a {@link HardAssigner} for
044 * {@link HierarchicalByteKMeansResult} instances. The assigner
045 * produces the index of the assigned leaf node as if the clusters were
046 * actually flat. 
047 * 
048 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
049 */
050public class HierarchicalByteHardAssigner implements HardAssigner<byte[], float[], IntFloatPair> {
051        /**
052         * The {@link ScoringScheme} determines how the distance
053         * to a cluster is estimated from the hierarchy of k-means
054         * generated clusters.
055         * 
056         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
057         */
058        public enum ScoringScheme {
059                /**
060                 * Sum distances down the tree.
061                 * 
062                 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
063                 */
064                SUM {
065                        @Override
066                        public float computeScore(float[] weights) {
067                                float sum = 0;
068                                for (float w : weights) {
069                                        if (w < 0) break;
070                                        sum += w;
071                                }
072                                
073                                return sum;
074                        }
075                },
076                /**
077                 * Product of distances down the tree.
078                 * 
079                 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
080                 */
081                PRODUCT {
082                        @Override
083                        public float computeScore(float[] weights) {
084                                float prod = 1;
085                                for (float w : weights) {
086                                        if (w < 0) break;
087                                        prod *= w;
088                                }
089                                
090                                return prod;
091                        }
092                },
093                /**
094                 * The distance in the root cluster 
095                 * 
096                 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
097                 */
098                FIRST {
099                        @Override
100                        public float computeScore(float[] weights) {
101                                return weights[0];
102                        }
103                },
104                /**
105                 * The distance in the leaf cluster
106                 * 
107                 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
108                 */
109                LAST {
110                        @Override
111                        public float computeScore(float[] weights) {
112                                float last = -1;
113                                
114                                for (float w : weights) {
115                                        if (w < 0) break;
116                                        last = w;
117                                }
118                                
119                                return last;
120                        }
121                },
122                /**
123                 * The mean distance down the tree
124                 * 
125                 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
126                 */
127                MEAN {
128                        @Override
129                        public float computeScore(float[] weights) {
130                                float sum = 0;
131                                int count = 0;
132                                
133                                for (float w : weights) {
134                                        if (w < 0) break;
135                                        sum += w;
136                                        count++;
137                                }
138                                
139                                return sum / (float)count;
140                        }
141                }
142                ;
143                
144                protected abstract float computeScore(float[] weights); 
145        }
146        
147        protected HierarchicalByteKMeansResult result;
148        protected HierarchicalBytePathAssigner path;
149        protected ScoringScheme scorer;
150        
151        /**
152         * Construct with the given hierarchical KMeans clusterer
153         * and scoring scheme.
154         *
155         * @param result the hierarchical KMeans clusterer
156         * @param scorer the scoring scheme
157         */
158        public HierarchicalByteHardAssigner(HierarchicalByteKMeansResult result, ScoringScheme scorer) {
159                this.result = result;
160                this.scorer = scorer;
161                this.path = new HierarchicalBytePathAssigner(result);
162        }
163
164        /**
165         * Construct with the given Hierarchical KMeans clusterer
166         * and the SUM scoring scheme.
167         *
168         * @param result the hierarchical KMeans clusterer
169         */
170        public HierarchicalByteHardAssigner(HierarchicalByteKMeansResult result) {
171                this(result, ScoringScheme.SUM);
172        }
173        
174        @Override
175        public int[] assign(byte[][] data) {
176                int [] asgn = new int[data.length];
177
178                for (int i=0; i<data.length; i++) {
179                        asgn[i] = result.getIndex(path.assign(data[i]));
180                }
181
182                return asgn;
183        }
184
185        @Override
186        public int assign(byte[] data) {
187                return result.getIndex(path.assign(data));
188        }
189
190        @Override
191        public void assignDistance(byte[][] data, int[] indices, float[] distances) {
192                int depth = result.getDepth();
193                byte [][] d = new byte[1][];
194                int [][] p = new int[1][depth];
195                float [][] w = new float[1][depth];
196                
197                for (int i=0; i<data.length; i++) {
198                        d[0] = data[i];
199                        
200                        path.assignWeighted(d, p, w);
201                        
202                        indices[i] = result.getIndex(p[0]);
203                        distances[i] = scorer.computeScore(w[0]);
204                }
205        }
206
207        @Override
208        public IntFloatPair assignDistance(byte[] data) {
209                IndependentPair<int[], float[]> pw = path.assignWeighted(data);
210                
211                int index = result.getIndex(pw.firstObject());
212                float score = scorer.computeScore(pw.secondObject());
213                
214                return new IntFloatPair(index, score);
215        }
216        
217        @Override
218        public int size() {
219            return result.countLeafs();
220        }
221        
222        @Override
223        public int numDimensions() {
224            return result.numDimensions();
225        }
226}