001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.workinprogress.featlearn;
031
032import no.uib.cipr.matrix.DenseMatrix;
033import no.uib.cipr.matrix.DenseVector;
034import no.uib.cipr.matrix.Matrices;
035import no.uib.cipr.matrix.Matrix.Norm;
036import no.uib.cipr.matrix.Vector;
037
038import org.openimaj.workinprogress.optimisation.DifferentiableObjectiveFunction;
039import org.openimaj.workinprogress.optimisation.params.VectorParameters;
040
041public class SparseAutoencoder
042                implements
043                DifferentiableObjectiveFunction<SparseAutoencoder.Model, double[], VectorParameters>
044{
045        public static class Model {
046                DenseMatrix w1;
047                DenseMatrix w2;
048                DenseVector b1;
049                DenseVector b2;
050
051                public Model(int nvisible, int nhidden) {
052                        w1 = (DenseMatrix) Matrices.random(nhidden, nhidden);
053                        w2 = (DenseMatrix) Matrices.random(nvisible, nhidden);
054                        b1 = (DenseVector) Matrices.random(nhidden);
055                        b2 = (DenseVector) Matrices.random(nvisible);
056                }
057
058                double[] feedforward(double[] input) {
059                        final DenseVector iv = new DenseVector(input, false);
060                        final DenseVector a1 = sigmoid(w1.multAdd(iv, b1.copy()));
061                        final DenseVector a2 = sigmoid(w2.multAdd(a1, b2.copy()));
062
063                        return a2.getData();
064                }
065
066                double[][] getLayerActivations(double[] input) {
067                        final DenseVector iv = new DenseVector(input, false);
068                        final DenseVector a1 = sigmoid(w1.multAdd(iv, b1.copy()));
069                        final DenseVector a2 = sigmoid(w2.multAdd(a1, b2.copy()));
070                        return new double[][] { a1.getData(), a2.getData() };
071                }
072
073                private DenseVector sigmoid(Vector vector) {
074                        final DenseVector out = (DenseVector) vector.copy();
075
076                        final double[] xd = out.getData();
077
078                        for (int i = 0; i < xd.length; i++)
079                                xd[i] = 1 / (1 + Math.exp(-xd[i]));
080
081                        return out;
082                }
083        }
084
085        private double lamda; // regularization for weight decay
086
087        @Override
088        public double value(Model model, double[] data) {
089                final double[] predict = model.feedforward(data);
090
091                double err = 0;
092                for (int i = 0; i < predict.length; i++) {
093                        final double diff = predict[i] - data[i];
094                        err += (diff * diff);
095                }
096
097                // Note that this is rather expensive each iter... can it be improved?
098                final double n1 = model.w1.norm(Norm.Frobenius);
099                final double n2 = model.w2.norm(Norm.Frobenius);
100                final double reg = n1 * n1 + n2 * n2;
101
102                return 0.5 * (err + lamda * reg);
103        }
104
105        @Override
106        public VectorParameters derivative(Model model, double[] data) {
107                final double[][] as = model.getLayerActivations(data);
108
109                final double[] d2 = new double[as[1].length];
110                for (int i = 0; i < d2.length; i++) {
111                        d2[i] = -(data[i] - as[1][i]) * (as[1][i] * (1 - as[1][i]));
112                }
113
114                final DenseVector wd = (DenseVector) model.w1.transMult(new DenseVector(d2),
115                                new DenseVector(model.w1.numColumns()));
116                final double[] d1 = wd.getData();
117                for (int i = 0; i < d1.length; i++) {
118                        d1[i] *= (as[0][i] * (1 - as[0][i]));
119                }
120
121                return null;
122        }
123
124        @Override
125        public void updateModel(Model model, VectorParameters weights) {
126                // TODO Auto-generated method stub
127
128        }
129}