001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.workinprogress.featlearn; 031 032import no.uib.cipr.matrix.DenseMatrix; 033import no.uib.cipr.matrix.DenseVector; 034import no.uib.cipr.matrix.Matrices; 035import no.uib.cipr.matrix.Matrix.Norm; 036import no.uib.cipr.matrix.Vector; 037 038import org.openimaj.workinprogress.optimisation.DifferentiableObjectiveFunction; 039import org.openimaj.workinprogress.optimisation.params.VectorParameters; 040 041public class SparseAutoencoder 042 implements 043 DifferentiableObjectiveFunction<SparseAutoencoder.Model, double[], VectorParameters> 044{ 045 public static class Model { 046 DenseMatrix w1; 047 DenseMatrix w2; 048 DenseVector b1; 049 DenseVector b2; 050 051 public Model(int nvisible, int nhidden) { 052 w1 = (DenseMatrix) Matrices.random(nhidden, nhidden); 053 w2 = (DenseMatrix) Matrices.random(nvisible, nhidden); 054 b1 = (DenseVector) Matrices.random(nhidden); 055 b2 = (DenseVector) Matrices.random(nvisible); 056 } 057 058 double[] feedforward(double[] input) { 059 final DenseVector iv = new DenseVector(input, false); 060 final DenseVector a1 = sigmoid(w1.multAdd(iv, b1.copy())); 061 final DenseVector a2 = sigmoid(w2.multAdd(a1, b2.copy())); 062 063 return a2.getData(); 064 } 065 066 double[][] getLayerActivations(double[] input) { 067 final DenseVector iv = new DenseVector(input, false); 068 final DenseVector a1 = sigmoid(w1.multAdd(iv, b1.copy())); 069 final DenseVector a2 = sigmoid(w2.multAdd(a1, b2.copy())); 070 return new double[][] { a1.getData(), a2.getData() }; 071 } 072 073 private DenseVector sigmoid(Vector vector) { 074 final DenseVector out = (DenseVector) vector.copy(); 075 076 final double[] xd = out.getData(); 077 078 for (int i = 0; i < xd.length; i++) 079 xd[i] = 1 / (1 + Math.exp(-xd[i])); 080 081 return out; 082 } 083 } 084 085 private double lamda; // regularization for weight decay 086 087 @Override 088 public double value(Model model, double[] data) { 089 final double[] predict = model.feedforward(data); 090 091 double err = 0; 092 for (int i = 0; i < predict.length; i++) { 093 final double diff = predict[i] - data[i]; 094 err += (diff * diff); 095 } 096 097 // Note that this is rather expensive each iter... can it be improved? 098 final double n1 = model.w1.norm(Norm.Frobenius); 099 final double n2 = model.w2.norm(Norm.Frobenius); 100 final double reg = n1 * n1 + n2 * n2; 101 102 return 0.5 * (err + lamda * reg); 103 } 104 105 @Override 106 public VectorParameters derivative(Model model, double[] data) { 107 final double[][] as = model.getLayerActivations(data); 108 109 final double[] d2 = new double[as[1].length]; 110 for (int i = 0; i < d2.length; i++) { 111 d2[i] = -(data[i] - as[1][i]) * (as[1][i] * (1 - as[1][i])); 112 } 113 114 final DenseVector wd = (DenseVector) model.w1.transMult(new DenseVector(d2), 115 new DenseVector(model.w1.numColumns())); 116 final double[] d1 = wd.getData(); 117 for (int i = 0; i < d1.length; i++) { 118 d1[i] *= (as[0][i] * (1 - as[0][i])); 119 } 120 121 return null; 122 } 123 124 @Override 125 public void updateModel(Model model, VectorParameters weights) { 126 // TODO Auto-generated method stub 127 128 } 129}