001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.ml.linear.data; 031 032import gov.sandia.cognition.math.matrix.Matrix; 033import gov.sandia.cognition.math.matrix.MatrixEntry; 034 035import java.util.ArrayList; 036import java.util.HashMap; 037import java.util.Iterator; 038import java.util.List; 039import java.util.Map; 040import java.util.Map.Entry; 041 042import org.openimaj.util.pair.IndependentPair; 043import org.openimaj.util.pair.Pair; 044 045public class BiconvexIncrementalDataGenerator { 046 047 BiconvexDataGenerator dgen; 048 private Map<String, Double> rety; 049 private Iterator<Map<String, Map<String, Double>>> retxIter; 050 051 /** 052 * 053 * @param nusers 054 * The number of users (U is users x tasks, X is features x 055 * users) 056 * @param nfeatures 057 * The number of features (W is features x tasks) 058 * @param ntasks 059 * The number of tasks (Y is 1 x tasks) 060 * @param sparcity 061 * The chance that a row of U or W is zeros 062 * @param xsparcity 063 * The chance that a column of U or W is zeros 064 * @param indw 065 * If true, there is a column of W per task 066 * @param indu 067 * If true, there is a column of U per task 068 * @param seed 069 * If greater than or equal to zero, the rng backing this 070 * generator is seeded 071 * @param noise 072 * The random noise added to Y has random values for each Y 073 * ranging from -noise to noise 074 */ 075 public BiconvexIncrementalDataGenerator( 076 int nusers, int nfeatures, int ntasks, 077 double sparcity, double xsparcity, 078 boolean indw, boolean indu, 079 int seed, double noise 080 081 ) 082 { 083 dgen = new BiconvexDataGenerator(nusers, nfeatures, ntasks, sparcity, xsparcity, indw, indu, seed, noise); 084 } 085 086 public IndependentPair<Map<String, Map<String, Double>>, Map<String, Double>> generate() { 087 // We generate a batch, and output incrementally per user 088 // For each user we assume the state Y is the same for each, while X is 089 // a sparse matrix where only the user's 090 // message is set (which itself might be sparse) 091 while (this.retxIter == null || !this.retxIter.hasNext()) { 092 prepareXY(); 093 } 094 final Map<String, Map<String, Double>> x = retxIter.next(); 095 return IndependentPair.pair(x, rety); 096 } 097 098 private void prepareXY() { 099 final Pair<Matrix> xy = dgen.generate(); 100 this.retxIter = prepareX(xy.getFirstObject()).iterator(); 101 this.rety = prepareY(xy.getSecondObject()); 102 } 103 104 private Map<String, Double> prepareY(Matrix secondObject) { 105 final Map<String, Double> ret = new HashMap<String, Double>(); 106 for (final MatrixEntry me : secondObject) { 107 ret.put(me.getColumnIndex() + "", me.getValue()); 108 } 109 return ret; 110 } 111 112 private List<Map<String, Map<String, Double>>> prepareX(Matrix firstObject) { 113 final HashMap<String, Map<String, Double>> ret = new HashMap<String, Map<String, Double>>(); 114 for (final MatrixEntry matrixEntry : firstObject) { 115 final int user = matrixEntry.getColumnIndex(); 116 final int word = matrixEntry.getRowIndex(); 117 final double v = matrixEntry.getValue(); 118 if (v != 0) { 119 final String userName = userName(user); 120 Map<String, Double> userMap = ret.get(userName); 121 if (userMap == null) { 122 ret.put(userName, userMap = new HashMap<String, Double>()); 123 } 124 final String wordName = wordName(word); 125 userMap.put(wordName, v); 126 } 127 } 128 final List<Map<String, Map<String, Double>>> retList = new ArrayList<Map<String, Map<String, Double>>>(); 129 for (final Entry<String, Map<String, Double>> map : ret.entrySet()) { 130 final Map<String, Map<String, Double>> userMap = new HashMap<String, Map<String, Double>>(); 131 userMap.put(map.getKey(), map.getValue()); 132 retList.add(userMap); 133 } 134 return retList; 135 } 136 137 private String wordName(int word) { 138 return String.format("word_%d", word); 139 } 140 141 private String userName(int user) { 142 return String.format("user_%d", user); 143 } 144 145}