001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.math.matrix.algorithm.whitening;
031
032import org.openimaj.citation.annotation.Reference;
033import org.openimaj.citation.annotation.ReferenceType;
034import org.openimaj.math.matrix.algorithm.pca.SvdPrincipalComponentAnalysis;
035import org.openimaj.math.statistics.normalisation.Normaliser;
036import org.openimaj.math.statistics.normalisation.TrainableNormaliser;
037
038import Jama.Matrix;
039
040/**
041 * Whitening based on PCA. The PCA basis is used to rotate the data, and then
042 * the data is normalised by the variances such that it has unit variance along
043 * all principal axes.
044 * <p>
045 * Optionally, you can also reduce the dimensionality of the data during the
046 * whitening process (by throwing away components with small eignevalues).
047 *
048 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
049 *
050 */
051@Reference(
052                type = ReferenceType.Book,
053                author = { "Hyvrinen, Aapo", "Hurri, Jarmo", "Hoyer, Patrick O." },
054                title = "Natural Image Statistics: A Probabilistic Approach to Early Computational Vision.",
055                year = "2009",
056                edition = "1st",
057                publisher = "Springer Publishing Company, Incorporated",
058                customData = {
059                                "isbn", "1848824904, 9781848824904"
060                })
061public class PCAWhitening extends WhiteningTransform {
062        protected double eps;
063        protected Normaliser ns;
064        protected Matrix transform;
065        protected int ndims = -1;
066
067        /**
068         * Construct with the given variance regularization parameter and data
069         * normalisation strategy.
070         *
071         * @param eps
072         *            the variance normalisation regularizer (each principle
073         *            dimension is divided by sqrt(lamba + eps), where lamba is the
074         *            corresponding eigenvalue).
075         * @param ns
076         *            the normalisation to apply to each input data vector prior to
077         *            training the transform or applying the actual whitening.
078         */
079        public PCAWhitening(double eps, Normaliser ns) {
080                this.eps = eps;
081                this.ns = ns;
082        }
083
084        /**
085         * Construct with the given variance regularization parameter, data
086         * normalisation strategy and target dimensionality.
087         *
088         * @param eps
089         *            the variance normalisation regularizer (each principle
090         *            dimension is divided by sqrt(lamba + eps), where lamba is the
091         *            corresponding eigenvalue).
092         * @param ns
093         *            the normalisation to apply to each input data vector prior to
094         *            training the transform or applying the actual whitening.
095         * @param ndims
096         *            the number of output dimensions for the whitened data
097         */
098        public PCAWhitening(double eps, Normaliser ns, int ndims) {
099                this.eps = eps;
100                this.ns = ns;
101                this.ndims = ndims;
102        }
103
104        @Override
105        public double[] whiten(double[] vector) {
106                final double[] normVec = ns.normalise(vector);
107
108                final Matrix vec = new Matrix(new double[][] { normVec });
109                return vec.times(transform).getColumnPackedCopy();
110        }
111
112        @Override
113        public void train(double[][] data) {
114                if (ns instanceof TrainableNormaliser)
115                        ((TrainableNormaliser) ns).train(data);
116
117                final double[][] normData = ns.normalise(data);
118
119                final SvdPrincipalComponentAnalysis pca = new SvdPrincipalComponentAnalysis(ndims);
120                pca.learnBasisNorm(new Matrix(normData));
121                transform = pca.getBasis();
122                final double[] weight = pca.getEigenValues();
123                final double[][] td = transform.getArray();
124
125                for (int c = 0; c < weight.length; c++)
126                        weight[c] = 1 / Math.sqrt(weight[c] + eps);
127
128                for (int r = 0; r < td.length; r++)
129                        for (int c = 0; c < td[0].length; c++)
130                                td[r][c] = td[r][c] * weight[c];
131        }
132
133        /**
134         * Get the underlying whitening transform matrix.
135         *
136         * @return the matrix
137         */
138        public Matrix getTransform() {
139                return transform;
140        }
141}