001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.math.util;
031
032import org.openimaj.util.array.ArrayUtils;
033
034/**
035 *
036 * Some basic statistical operations on double arrays
037 *
038 * @author Sina Samangooei (ss@ecs.soton.ac.uk)
039 *
040 */
041public class DoubleArrayStatsUtils {
042        /**
043         * Find the mean of a single dimensional double array. returns 0 if the
044         * array is empty.
045         *
046         * @param arr
047         * @return the mean
048         */
049        public static double mean(double[] arr) {
050                if (arr.length == 0) {
051                        return 0;
052                }
053                int count = 1;
054                double mean = arr[0];
055                for (int i = 1; i < arr.length; i++) {
056                        count++;
057                        mean = mean + (arr[i] - mean) / count;
058                }
059                return mean;
060        }
061
062        /**
063         * Calculate the mean of a two dimensional double array. returns 0 if the
064         * array is empty.
065         *
066         * @param arr
067         * @return the mean
068         */
069        public static double mean(double[][] arr) {
070                if (arr.length == 0) {
071                        return 0;
072                }
073                int firstRowIndex = 0;
074                while (arr[firstRowIndex].length == 0)
075                        firstRowIndex++;
076                int firstColIndex = 1;
077
078                int count = 1;
079                double mean = arr[firstRowIndex][0];
080
081                for (int i = firstRowIndex; i < arr.length; i++) {
082                        for (int j = firstColIndex; j < arr[i].length; j++) {
083                                count++;
084                                mean = mean + (arr[i][j] - mean) / count;
085                        }
086                        firstColIndex = 0;
087                }
088
089                return mean;
090        }
091
092        /**
093         * Calculate the sample variance of a one dimensional double array. If the
094         * length of the array is less than 2, variance is 0.
095         *
096         * @param arr
097         * @return the variance
098         */
099        public static double var(double[] arr) {
100                if (arr.length < 2) {
101                        return 0;
102                }
103
104                int count = 1;
105                double oldMean = arr[0];
106                double newMean = arr[0];
107                double var = 0;
108
109                for (int i = 1; i < arr.length; i++) {
110                        count++;
111                        final double x = arr[i];
112                        newMean = oldMean + (x - oldMean) / count;
113                        var = var + (x - oldMean) * (x - newMean);
114                        oldMean = newMean;
115                }
116
117                return var / (count - 1);
118        }
119
120        /**
121         * Calculate the sample variance of a one dimensional double array. If the
122         * length of the array is less than 2, variance is 0.
123         *
124         * @param arr
125         * @return the variance
126         */
127        public static double var(double[][] arr) {
128                if (arr.length == 0) {
129                        return 0;
130                }
131
132                int firstRowIndex = 0;
133                while (arr[firstRowIndex].length == 0)
134                        firstRowIndex++;
135                int firstColIndex = 1;
136
137                int count = 1;
138                double oldMean = arr[firstRowIndex][0];
139                double newMean = arr[firstRowIndex][0];
140                double var = 0;
141
142                for (int i = firstRowIndex; i < arr.length; i++) {
143                        for (int j = firstColIndex; j < arr[i].length; j++) {
144                                count++;
145                                final double x = arr[i][j];
146                                newMean = oldMean + (x - oldMean) / count;
147                                var = var + (x - oldMean) * (x - newMean);
148                                oldMean = newMean;
149                        }
150                        firstColIndex = 0;
151                }
152
153                return count > 1 ? var / (count - 1) : 0;
154        }
155
156        /**
157         * Calculate the sample standard deviation of a 2D array. Calls
158         * {@link DoubleArrayStatsUtils#var(double[][])} and does a Math.sqrt.
159         *
160         * @param arr
161         * @return the standard deviation
162         */
163        public static double std(double[][] arr) {
164                return Math.sqrt(var(arr));
165        }
166
167        /**
168         * Calculate the sample standard deviation of a 1D array. Calls
169         * {@link DoubleArrayStatsUtils#var(double[])} and does a Math.sqrt.
170         *
171         * @param arr
172         * @return the standard deviation
173         */
174        public static double std(double[] arr) {
175                return Math.sqrt(var(arr));
176        }
177
178        /**
179         * Calculate the sum of a 2D array.
180         *
181         * @param arr
182         * @return the sum
183         */
184        public static double sum(double[][] arr) {
185                double sum = 0;
186                for (int i = 0; i < arr.length; i++)
187                        sum += sum(arr[i]);
188                return sum;
189        }
190
191        /**
192         * Calculate the sum of a 1D array.
193         *
194         * @param arr
195         * @return the sum
196         */
197        public static double sum(double[] arr) {
198                double sum = 0;
199                for (int i = 0; i < arr.length; i++)
200                        sum += arr[i];
201                return sum;
202        }
203
204        /**
205         * Calculate the sum of the squared values of a 2D array.
206         *
207         * @param arr
208         * @return sum of squares
209         */
210        public static double sumSq(double[][] arr) {
211                double sum = 0;
212                for (int i = 0; i < arr.length; i++)
213                        sum += sumSq(arr[i]);
214                return sum;
215        }
216
217        /**
218         * Calculate the sum the squared values of a 1D array.
219         *
220         * @param arr
221         * @return sum of squares
222         */
223        public static double sumSq(double[] arr) {
224                double sum = 0;
225                for (int i = 0; i < arr.length; i++)
226                        sum += arr[i] * arr[i];
227                return sum;
228        }
229
230        /**
231         * Calculate the sum of the absolute values of a 2D array.
232         *
233         * @param arr
234         * @return the sum absolute values
235         */
236        public static double sumAbs(double[][] arr) {
237                double sum = 0;
238                for (int i = 0; i < arr.length; i++)
239                        sum += sumAbs(arr[i]);
240                return sum;
241        }
242
243        /**
244         * Calculate the sum the absolute values of a 1D array.
245         *
246         * @param arr
247         * @return the sum absolute values
248         */
249        public static double sumAbs(double[] arr) {
250                double sum = 0;
251                for (int i = 0; i < arr.length; i++)
252                        sum += Math.abs(arr[i]);
253                return sum;
254        }
255
256        /**
257         * Calculate the median of the given array. Uses the quick select algorithm
258         * ({@link ArrayUtils#quickSelect(double[], int)}).
259         *
260         * @param arr
261         *            the array
262         * @return the median value
263         */
264        public static double median(double[] arr) {
265                final int median = arr.length / 2;
266
267                if (arr.length % 2 == 0) {
268                        final double a = ArrayUtils.quickSelect(arr, median);
269                        final double b = ArrayUtils.quickSelect(arr, median - 1);
270
271                        return (a + b) / 2f;
272                }
273                return ArrayUtils.quickSelect(arr, median);
274        }
275
276        /**
277         * Calculate the median of the given sub-array. Uses the quick select
278         * algorithm ({@link ArrayUtils#quickSelect(double[], int, int, int)}).
279         *
280         * @param arr
281         *            the array
282         * @param start
283         *            starting point in the array (inclusive)
284         * @param stop
285         *            stopping point in the array (exclusive)
286         * @return the median value
287         */
288        public static double median(double[] arr, int start, int stop) {
289                final int median = arr.length / 2;
290
291                if (arr.length % 2 == 0) {
292                        final double a = ArrayUtils.quickSelect(arr, median, start, stop);
293                        final double b = ArrayUtils.quickSelect(arr, median - 1, start, stop);
294
295                        return (a + b) / 2f;
296                }
297                return ArrayUtils.quickSelect(arr, median, start, stop);
298        }
299}