001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.ml.linear.data;
031
032import gov.sandia.cognition.math.matrix.Matrix;
033import gov.sandia.cognition.math.matrix.MatrixEntry;
034
035import java.util.ArrayList;
036import java.util.HashMap;
037import java.util.Iterator;
038import java.util.List;
039import java.util.Map;
040import java.util.Map.Entry;
041
042import org.openimaj.util.pair.IndependentPair;
043import org.openimaj.util.pair.Pair;
044
045public class BiconvexIncrementalDataGenerator {
046
047        BiconvexDataGenerator dgen;
048        private Map<String, Double> rety;
049        private Iterator<Map<String, Map<String, Double>>> retxIter;
050
051        /**
052         * 
053         * @param nusers
054         *            The number of users (U is users x tasks, X is features x
055         *            users)
056         * @param nfeatures
057         *            The number of features (W is features x tasks)
058         * @param ntasks
059         *            The number of tasks (Y is 1 x tasks)
060         * @param sparcity
061         *            The chance that a row of U or W is zeros
062         * @param xsparcity
063         *            The chance that a column of U or W is zeros
064         * @param indw
065         *            If true, there is a column of W per task
066         * @param indu
067         *            If true, there is a column of U per task
068         * @param seed
069         *            If greater than or equal to zero, the rng backing this
070         *            generator is seeded
071         * @param noise
072         *            The random noise added to Y has random values for each Y
073         *            ranging from -noise to noise
074         */
075        public BiconvexIncrementalDataGenerator(
076                        int nusers, int nfeatures, int ntasks,
077                        double sparcity, double xsparcity,
078                        boolean indw, boolean indu,
079                        int seed, double noise
080
081        )
082        {
083                dgen = new BiconvexDataGenerator(nusers, nfeatures, ntasks, sparcity, xsparcity, indw, indu, seed, noise);
084        }
085
086        public IndependentPair<Map<String, Map<String, Double>>, Map<String, Double>> generate() {
087                // We generate a batch, and output incrementally per user
088                // For each user we assume the state Y is the same for each, while X is
089                // a sparse matrix where only the user's
090                // message is set (which itself might be sparse)
091                while (this.retxIter == null || !this.retxIter.hasNext()) {
092                        prepareXY();
093                }
094                final Map<String, Map<String, Double>> x = retxIter.next();
095                return IndependentPair.pair(x, rety);
096        }
097
098        private void prepareXY() {
099                final Pair<Matrix> xy = dgen.generate();
100                this.retxIter = prepareX(xy.getFirstObject()).iterator();
101                this.rety = prepareY(xy.getSecondObject());
102        }
103
104        private Map<String, Double> prepareY(Matrix secondObject) {
105                final Map<String, Double> ret = new HashMap<String, Double>();
106                for (final MatrixEntry me : secondObject) {
107                        ret.put(me.getColumnIndex() + "", me.getValue());
108                }
109                return ret;
110        }
111
112        private List<Map<String, Map<String, Double>>> prepareX(Matrix firstObject) {
113                final HashMap<String, Map<String, Double>> ret = new HashMap<String, Map<String, Double>>();
114                for (final MatrixEntry matrixEntry : firstObject) {
115                        final int user = matrixEntry.getColumnIndex();
116                        final int word = matrixEntry.getRowIndex();
117                        final double v = matrixEntry.getValue();
118                        if (v != 0) {
119                                final String userName = userName(user);
120                                Map<String, Double> userMap = ret.get(userName);
121                                if (userMap == null) {
122                                        ret.put(userName, userMap = new HashMap<String, Double>());
123                                }
124                                final String wordName = wordName(word);
125                                userMap.put(wordName, v);
126                        }
127                }
128                final List<Map<String, Map<String, Double>>> retList = new ArrayList<Map<String, Map<String, Double>>>();
129                for (final Entry<String, Map<String, Double>> map : ret.entrySet()) {
130                        final Map<String, Map<String, Double>> userMap = new HashMap<String, Map<String, Double>>();
131                        userMap.put(map.getKey(), map.getValue());
132                        retList.add(userMap);
133                }
134                return retList;
135        }
136
137        private String wordName(int word) {
138                return String.format("word_%d", word);
139        }
140
141        private String userName(int user) {
142                return String.format("user_%d", user);
143        }
144
145}