001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.ml.linear.data;
031
032import gov.sandia.cognition.math.matrix.Matrix;
033import gov.sandia.cognition.math.matrix.MatrixEntry;
034import gov.sandia.cognition.math.matrix.Vector;
035import gov.sandia.cognition.math.matrix.mtj.SparseMatrix;
036import gov.sandia.cognition.math.matrix.mtj.SparseMatrixFactoryMTJ;
037
038import java.util.Random;
039
040import org.openimaj.util.pair.Pair;
041
042/**
043 * Data generated from a biconvex system of the form:
044 * 
045 * Y_n,t = U_:,t^T . X_n . W_t + rand()
046 * 
047 * Note that each n'th instance of Y can have values for T tasks.
048 * 
049 * The parameter matricies U and W can be independant of tasks or note.
050 * 
051 * The amount of random noise added can be controlled
052 * 
053 * The sparcity of U, W and X can be controlled, the sparcity of U and W is
054 * consistent per generator. The sparcity of X changes per generation.
055 * 
056 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
057 * 
058 */
059public class BiconvexDataGenerator implements MatrixDataGenerator<Matrix> {
060
061        private int seed;
062        private int nusers;
063        private int nfeatures;
064        private int ntasks;
065        private boolean indw;
066        private boolean indu;
067        private Random rng;
068        private SparseMatrixFactoryMTJ smf;
069        private Matrix u;
070        private SparseMatrix w;
071        private double noise;
072        private double xsparcity;
073
074        /**
075         * Generates a biconvex data generator.
076         */
077        public BiconvexDataGenerator() {
078                this(5, 10, 1, 0.3, 0, true, false, -1, 0.0001);
079        }
080
081        /**
082         * 
083         * @param nusers
084         *            The number of users (U is users x tasks, X is features x
085         *            users)
086         * @param nfeatures
087         *            The number of features (W is features x tasks)
088         * @param ntasks
089         *            The number of tasks (Y is 1 x tasks)
090         * @param sparcity
091         *            The chance that a row of U or W is zeros
092         * @param xsparcity
093         *            The chance that a column of U or W is zeros
094         * @param indw
095         *            If true, there is a column of W per task
096         * @param indu
097         *            If true, there is a column of U per task
098         * @param seed
099         *            If greater than or equal to zero, the rng backing this
100         *            generator is seeded
101         * @param noise
102         *            The random noise added to Y has random values for each Y
103         *            ranging from -noise to noise
104         */
105        public BiconvexDataGenerator(
106                        int nusers, int nfeatures, int ntasks,
107                        double sparcity, double xsparcity,
108                        boolean indw, boolean indu,
109                        int seed, double noise
110
111        )
112        {
113                this.seed = seed;
114                this.nusers = nusers;
115                this.nfeatures = nfeatures;
116                this.ntasks = ntasks;
117                this.indw = indw;
118                this.indu = indu;
119                this.noise = Math.abs(noise);
120                this.xsparcity = xsparcity;
121                this.smf = new SparseMatrixFactoryMTJ();
122
123                if (this.seed >= 0)
124                        this.rng = new Random(this.seed);
125                else
126                        this.rng = new Random();
127                if (indu) {
128                        this.u = smf.createUniformRandom(nusers, ntasks, 0, 1, this.rng);
129                }
130                else {
131                        this.u = smf.createUniformRandom(nusers, 1, 0, 1, this.rng);
132                }
133
134                if (indw) {
135                        this.w = smf.createUniformRandom(nfeatures, ntasks, 0, 1, this.rng);
136                }
137                else {
138                        this.w = smf.createUniformRandom(nfeatures, 1, 0, 1, this.rng);
139                }
140
141                final Vector zeroUserWord = smf.createMatrix(1, ntasks).getRow(0);
142                for (int i = 0; i < nusers; i++) {
143                        if (this.rng.nextDouble() < sparcity) {
144                                this.u.setRow(i, zeroUserWord);
145                        }
146                }
147                for (int i = 0; i < nfeatures; i++) {
148                        if (this.rng.nextDouble() < sparcity) {
149                                this.w.setRow(i, zeroUserWord);
150                        }
151                }
152        }
153
154        private Matrix calcY(Matrix u, Matrix x, Matrix w) {
155                final Matrix ut = u.transpose();
156                final Matrix xt = x.transpose();
157                final Matrix utdotxt = ut.times(xt);
158                return utdotxt.times(w);
159        }
160
161        @Override
162        public Pair<Matrix> generate() {
163                Matrix x = smf.createUniformRandom(nfeatures, nusers, 0, 1, rng);
164                final Matrix xSparse = smf.createMatrix(nfeatures, nusers);
165                for (final MatrixEntry matrixEntry : x) {
166                        if (this.rng.nextDouble() >= this.xsparcity) {
167                                xSparse.setElement(matrixEntry.getRowIndex(), matrixEntry.getColumnIndex(), matrixEntry.getValue());
168                        }
169                }
170                x = xSparse;
171                Matrix y = null;
172                if (indw && indu) {
173                        y = smf.createMatrix(1, ntasks);
174                        for (int i = 0; i < this.ntasks; i++) {
175                                final Matrix subu = this.u.getSubMatrix(0, nusers - 1, i, i);
176                                final Matrix subw = this.w.getSubMatrix(0, nfeatures - 1, i, i);
177                                final Matrix yval = calcY(subu, x, subw);
178                                y.setSubMatrix(0, i, yval);
179                        }
180                } else {
181                        y = calcY(u, x, w);
182                }
183                if (y.getNumColumns() < y.getNumRows()) {
184                        y = y.transpose();
185                }
186                if (this.noise != 0) {
187                        final SparseMatrix nm = smf.createUniformRandom(1, this.ntasks, -this.noise, this.noise, this.rng);
188                        y.plusEquals(nm);
189                }
190                return new Pair<Matrix>(x, y);
191        }
192
193        public Matrix getU() {
194                return this.u;
195        }
196
197        public Matrix getW() {
198                return this.w;
199        }
200}