1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 package org.openimaj.workinprogress.featlearn;
31
32 import no.uib.cipr.matrix.DenseMatrix;
33 import no.uib.cipr.matrix.DenseVector;
34 import no.uib.cipr.matrix.Matrices;
35 import no.uib.cipr.matrix.Matrix.Norm;
36 import no.uib.cipr.matrix.Vector;
37
38 import org.openimaj.workinprogress.optimisation.DifferentiableObjectiveFunction;
39 import org.openimaj.workinprogress.optimisation.params.VectorParameters;
40
41 public class SparseAutoencoder
42 implements
43 DifferentiableObjectiveFunction<SparseAutoencoder.Model, double[], VectorParameters>
44 {
45 public static class Model {
46 DenseMatrix w1;
47 DenseMatrix w2;
48 DenseVector b1;
49 DenseVector b2;
50
51 public Model(int nvisible, int nhidden) {
52 w1 = (DenseMatrix) Matrices.random(nhidden, nhidden);
53 w2 = (DenseMatrix) Matrices.random(nvisible, nhidden);
54 b1 = (DenseVector) Matrices.random(nhidden);
55 b2 = (DenseVector) Matrices.random(nvisible);
56 }
57
58 double[] feedforward(double[] input) {
59 final DenseVector iv = new DenseVector(input, false);
60 final DenseVector a1 = sigmoid(w1.multAdd(iv, b1.copy()));
61 final DenseVector a2 = sigmoid(w2.multAdd(a1, b2.copy()));
62
63 return a2.getData();
64 }
65
66 double[][] getLayerActivations(double[] input) {
67 final DenseVector iv = new DenseVector(input, false);
68 final DenseVector a1 = sigmoid(w1.multAdd(iv, b1.copy()));
69 final DenseVector a2 = sigmoid(w2.multAdd(a1, b2.copy()));
70 return new double[][] { a1.getData(), a2.getData() };
71 }
72
73 private DenseVector sigmoid(Vector vector) {
74 final DenseVector out = (DenseVector) vector.copy();
75
76 final double[] xd = out.getData();
77
78 for (int i = 0; i < xd.length; i++)
79 xd[i] = 1 / (1 + Math.exp(-xd[i]));
80
81 return out;
82 }
83 }
84
85 private double lamda;
86
87 @Override
88 public double value(Model model, double[] data) {
89 final double[] predict = model.feedforward(data);
90
91 double err = 0;
92 for (int i = 0; i < predict.length; i++) {
93 final double diff = predict[i] - data[i];
94 err += (diff * diff);
95 }
96
97
98 final double n1 = model.w1.norm(Norm.Frobenius);
99 final double n2 = model.w2.norm(Norm.Frobenius);
100 final double reg = n1 * n1 + n2 * n2;
101
102 return 0.5 * (err + lamda * reg);
103 }
104
105 @Override
106 public VectorParameters derivative(Model model, double[] data) {
107 final double[][] as = model.getLayerActivations(data);
108
109 final double[] d2 = new double[as[1].length];
110 for (int i = 0; i < d2.length; i++) {
111 d2[i] = -(data[i] - as[1][i]) * (as[1][i] * (1 - as[1][i]));
112 }
113
114 final DenseVector wd = (DenseVector) model.w1.transMult(new DenseVector(d2),
115 new DenseVector(model.w1.numColumns()));
116 final double[] d1 = wd.getData();
117 for (int i = 0; i < d1.length; i++) {
118 d1[i] *= (as[0][i] * (1 - as[0][i]));
119 }
120
121 return null;
122 }
123
124 @Override
125 public void updateModel(Model model, VectorParameters weights) {
126
127
128 }
129 }