001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.ml.linear.data; 031 032import gov.sandia.cognition.math.matrix.Matrix; 033import gov.sandia.cognition.math.matrix.MatrixEntry; 034import gov.sandia.cognition.math.matrix.Vector; 035import gov.sandia.cognition.math.matrix.mtj.SparseMatrix; 036import gov.sandia.cognition.math.matrix.mtj.SparseMatrixFactoryMTJ; 037 038import java.util.Random; 039 040import org.openimaj.util.pair.Pair; 041 042/** 043 * Data generated from a biconvex system of the form: 044 * 045 * Y_n,t = U_:,t^T . X_n . W_t + rand() 046 * 047 * Note that each n'th instance of Y can have values for T tasks. 048 * 049 * The parameter matricies U and W can be independant of tasks or note. 050 * 051 * The amount of random noise added can be controlled 052 * 053 * The sparcity of U, W and X can be controlled, the sparcity of U and W is 054 * consistent per generator. The sparcity of X changes per generation. 055 * 056 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 057 * 058 */ 059public class BiconvexDataGenerator implements MatrixDataGenerator<Matrix> { 060 061 private int seed; 062 private int nusers; 063 private int nfeatures; 064 private int ntasks; 065 private boolean indw; 066 private boolean indu; 067 private Random rng; 068 private SparseMatrixFactoryMTJ smf; 069 private Matrix u; 070 private SparseMatrix w; 071 private double noise; 072 private double xsparcity; 073 074 /** 075 * Generates a biconvex data generator. 076 */ 077 public BiconvexDataGenerator() { 078 this(5, 10, 1, 0.3, 0, true, false, -1, 0.0001); 079 } 080 081 /** 082 * 083 * @param nusers 084 * The number of users (U is users x tasks, X is features x 085 * users) 086 * @param nfeatures 087 * The number of features (W is features x tasks) 088 * @param ntasks 089 * The number of tasks (Y is 1 x tasks) 090 * @param sparcity 091 * The chance that a row of U or W is zeros 092 * @param xsparcity 093 * The chance that a column of U or W is zeros 094 * @param indw 095 * If true, there is a column of W per task 096 * @param indu 097 * If true, there is a column of U per task 098 * @param seed 099 * If greater than or equal to zero, the rng backing this 100 * generator is seeded 101 * @param noise 102 * The random noise added to Y has random values for each Y 103 * ranging from -noise to noise 104 */ 105 public BiconvexDataGenerator( 106 int nusers, int nfeatures, int ntasks, 107 double sparcity, double xsparcity, 108 boolean indw, boolean indu, 109 int seed, double noise 110 111 ) 112 { 113 this.seed = seed; 114 this.nusers = nusers; 115 this.nfeatures = nfeatures; 116 this.ntasks = ntasks; 117 this.indw = indw; 118 this.indu = indu; 119 this.noise = Math.abs(noise); 120 this.xsparcity = xsparcity; 121 this.smf = new SparseMatrixFactoryMTJ(); 122 123 if (this.seed >= 0) 124 this.rng = new Random(this.seed); 125 else 126 this.rng = new Random(); 127 if (indu) { 128 this.u = smf.createUniformRandom(nusers, ntasks, 0, 1, this.rng); 129 } 130 else { 131 this.u = smf.createUniformRandom(nusers, 1, 0, 1, this.rng); 132 } 133 134 if (indw) { 135 this.w = smf.createUniformRandom(nfeatures, ntasks, 0, 1, this.rng); 136 } 137 else { 138 this.w = smf.createUniformRandom(nfeatures, 1, 0, 1, this.rng); 139 } 140 141 final Vector zeroUserWord = smf.createMatrix(1, ntasks).getRow(0); 142 for (int i = 0; i < nusers; i++) { 143 if (this.rng.nextDouble() < sparcity) { 144 this.u.setRow(i, zeroUserWord); 145 } 146 } 147 for (int i = 0; i < nfeatures; i++) { 148 if (this.rng.nextDouble() < sparcity) { 149 this.w.setRow(i, zeroUserWord); 150 } 151 } 152 } 153 154 private Matrix calcY(Matrix u, Matrix x, Matrix w) { 155 final Matrix ut = u.transpose(); 156 final Matrix xt = x.transpose(); 157 final Matrix utdotxt = ut.times(xt); 158 return utdotxt.times(w); 159 } 160 161 @Override 162 public Pair<Matrix> generate() { 163 Matrix x = smf.createUniformRandom(nfeatures, nusers, 0, 1, rng); 164 final Matrix xSparse = smf.createMatrix(nfeatures, nusers); 165 for (final MatrixEntry matrixEntry : x) { 166 if (this.rng.nextDouble() >= this.xsparcity) { 167 xSparse.setElement(matrixEntry.getRowIndex(), matrixEntry.getColumnIndex(), matrixEntry.getValue()); 168 } 169 } 170 x = xSparse; 171 Matrix y = null; 172 if (indw && indu) { 173 y = smf.createMatrix(1, ntasks); 174 for (int i = 0; i < this.ntasks; i++) { 175 final Matrix subu = this.u.getSubMatrix(0, nusers - 1, i, i); 176 final Matrix subw = this.w.getSubMatrix(0, nfeatures - 1, i, i); 177 final Matrix yval = calcY(subu, x, subw); 178 y.setSubMatrix(0, i, yval); 179 } 180 } else { 181 y = calcY(u, x, w); 182 } 183 if (y.getNumColumns() < y.getNumRows()) { 184 y = y.transpose(); 185 } 186 if (this.noise != 0) { 187 final SparseMatrix nm = smf.createUniformRandom(1, this.ntasks, -this.noise, this.noise, this.rng); 188 y.plusEquals(nm); 189 } 190 return new Pair<Matrix>(x, y); 191 } 192 193 public Matrix getU() { 194 return this.u; 195 } 196 197 public Matrix getW() { 198 return this.w; 199 } 200}