001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.math.statistics.normalisation; 031 032/** 033 * z-score normalisation (standardisation). Upon training, the mean and variance 034 * of each dimension is computed; normalisation works by subtracting the mean 035 * and dividing by the standard deviation. 036 * <p> 037 * This implementation includes an optional regularisation parameter that is 038 * added to the variance before the division. 039 * 040 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 041 * 042 */ 043public class ZScore implements TrainableNormaliser, Denormaliser { 044 double[] mean; 045 double[] sigma; 046 double eps = 0; 047 048 /** 049 * Construct without regularisation. 050 */ 051 public ZScore() { 052 } 053 054 /** 055 * Construct with regularisation. 056 * 057 * @param eps 058 * the variance normalisation regulariser (each dimension is 059 * divided by sqrt(var + eps). 060 */ 061 public ZScore(double eps) { 062 this.eps = eps; 063 } 064 065 @Override 066 public void train(double[][] data) { 067 mean = new double[data[0].length]; 068 sigma = new double[data[0].length]; 069 070 for (int r = 0; r < data.length; r++) 071 for (int c = 0; c < data[0].length; c++) 072 mean[c] += data[r][c]; 073 074 for (int c = 0; c < data[0].length; c++) 075 mean[c] /= data.length; 076 077 for (int r = 0; r < data.length; r++) { 078 for (int c = 0; c < data[0].length; c++) { 079 final double delta = (data[r][c] - mean[c]); 080 sigma[c] += delta * delta; 081 } 082 } 083 084 for (int c = 0; c < data[0].length; c++) 085 sigma[c] = Math.sqrt(eps + (sigma[c] / (data.length - 1))); 086 } 087 088 @Override 089 public double[] normalise(double[] vector) { 090 final double[] out = new double[vector.length]; 091 for (int c = 0; c < out.length; c++) 092 out[c] = (vector[c] - mean[c]) / sigma[c]; 093 return out; 094 } 095 096 @Override 097 public double[][] normalise(double[][] data) { 098 final double[][] out = new double[data.length][]; 099 for (int c = 0; c < out.length; c++) 100 out[c] = normalise(data[c]); 101 return out; 102 } 103 104 @Override 105 public double[] denormalise(double[] vector) { 106 final double[] out = new double[vector.length]; 107 for (int c = 0; c < out.length; c++) 108 out[c] = sigma[c] * vector[c] + mean[c]; 109 return out; 110 } 111 112 @Override 113 public double[][] denormalise(double[][] data) { 114 final double[][] out = new double[data.length][]; 115 for (int c = 0; c < out.length; c++) 116 out[c] = denormalise(data[c]); 117 return out; 118 } 119}