001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math3.stat.ranking;
019
020 import java.util.ArrayList;
021 import java.util.Arrays;
022 import java.util.Iterator;
023 import java.util.List;
024
025 import org.apache.commons.math3.exception.MathInternalError;
026 import org.apache.commons.math3.exception.NotANumberException;
027 import org.apache.commons.math3.random.RandomData;
028 import org.apache.commons.math3.random.RandomDataImpl;
029 import org.apache.commons.math3.random.RandomGenerator;
030 import org.apache.commons.math3.util.FastMath;
031
032
033 /**
034 * <p> Ranking based on the natural ordering on doubles.</p>
035 * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
036 * are handled using the selected {@link TiesStrategy}.
037 * Configuration settings are supplied in optional constructor arguments.
038 * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE},
039 * respectively. When using {@link TiesStrategy#RANDOM}, a
040 * {@link RandomGenerator} may be supplied as a constructor argument.</p>
041 * <p>Examples:
042 * <table border="1" cellpadding="3">
043 * <tr><th colspan="3">
044 * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
045 * </th></tr>
046 * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
047 * <th><code>rank(data)</code></th>
048 * <tr>
049 * <td>default (NaNs maximal)</td>
050 * <td>default (ties averaged)</td>
051 * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
052 * <tr>
053 * <td>default (NaNs maximal)</td>
054 * <td>MINIMUM</td>
055 * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
056 * <tr>
057 * <td>MINIMAL</td>
058 * <td>default (ties averaged)</td>
059 * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
060 * <tr>
061 * <td>REMOVED</td>
062 * <td>SEQUENTIAL</td>
063 * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
064 * <tr>
065 * <td>MINIMAL</td>
066 * <td>MAXIMUM</td>
067 * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p>
068 *
069 * @since 2.0
070 * @version $Id: NaturalRanking.java 1416643 2012-12-03 19:37:14Z tn $
071 */
072 public class NaturalRanking implements RankingAlgorithm {
073
074 /** default NaN strategy */
075 public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;
076
077 /** default ties strategy */
078 public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
079
080 /** NaN strategy - defaults to NaNs maximal */
081 private final NaNStrategy nanStrategy;
082
083 /** Ties strategy - defaults to ties averaged */
084 private final TiesStrategy tiesStrategy;
085
086 /** Source of random data - used only when ties strategy is RANDOM */
087 private final RandomData randomData;
088
089 /**
090 * Create a NaturalRanking with default strategies for handling ties and NaNs.
091 */
092 public NaturalRanking() {
093 super();
094 tiesStrategy = DEFAULT_TIES_STRATEGY;
095 nanStrategy = DEFAULT_NAN_STRATEGY;
096 randomData = null;
097 }
098
099 /**
100 * Create a NaturalRanking with the given TiesStrategy.
101 *
102 * @param tiesStrategy the TiesStrategy to use
103 */
104 public NaturalRanking(TiesStrategy tiesStrategy) {
105 super();
106 this.tiesStrategy = tiesStrategy;
107 nanStrategy = DEFAULT_NAN_STRATEGY;
108 randomData = new RandomDataImpl();
109 }
110
111 /**
112 * Create a NaturalRanking with the given NaNStrategy.
113 *
114 * @param nanStrategy the NaNStrategy to use
115 */
116 public NaturalRanking(NaNStrategy nanStrategy) {
117 super();
118 this.nanStrategy = nanStrategy;
119 tiesStrategy = DEFAULT_TIES_STRATEGY;
120 randomData = null;
121 }
122
123 /**
124 * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
125 *
126 * @param nanStrategy NaNStrategy to use
127 * @param tiesStrategy TiesStrategy to use
128 */
129 public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
130 super();
131 this.nanStrategy = nanStrategy;
132 this.tiesStrategy = tiesStrategy;
133 randomData = new RandomDataImpl();
134 }
135
136 /**
137 * Create a NaturalRanking with TiesStrategy.RANDOM and the given
138 * RandomGenerator as the source of random data.
139 *
140 * @param randomGenerator source of random data
141 */
142 public NaturalRanking(RandomGenerator randomGenerator) {
143 super();
144 this.tiesStrategy = TiesStrategy.RANDOM;
145 nanStrategy = DEFAULT_NAN_STRATEGY;
146 randomData = new RandomDataImpl(randomGenerator);
147 }
148
149
150 /**
151 * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
152 * and the given source of random data.
153 *
154 * @param nanStrategy NaNStrategy to use
155 * @param randomGenerator source of random data
156 */
157 public NaturalRanking(NaNStrategy nanStrategy,
158 RandomGenerator randomGenerator) {
159 super();
160 this.nanStrategy = nanStrategy;
161 this.tiesStrategy = TiesStrategy.RANDOM;
162 randomData = new RandomDataImpl(randomGenerator);
163 }
164
165 /**
166 * Return the NaNStrategy
167 *
168 * @return returns the NaNStrategy
169 */
170 public NaNStrategy getNanStrategy() {
171 return nanStrategy;
172 }
173
174 /**
175 * Return the TiesStrategy
176 *
177 * @return the TiesStrategy
178 */
179 public TiesStrategy getTiesStrategy() {
180 return tiesStrategy;
181 }
182
183 /**
184 * Rank <code>data</code> using the natural ordering on Doubles, with
185 * NaN values handled according to <code>nanStrategy</code> and ties
186 * resolved using <code>tiesStrategy.</code>
187 *
188 * @param data array to be ranked
189 * @return array of ranks
190 * @throws NotANumberException if the selected {@link NaNStrategy} is {@code FAILED}
191 * and a {@link Double#NaN} is encountered in the input data
192 */
193 public double[] rank(double[] data) {
194
195 // Array recording initial positions of data to be ranked
196 IntDoublePair[] ranks = new IntDoublePair[data.length];
197 for (int i = 0; i < data.length; i++) {
198 ranks[i] = new IntDoublePair(data[i], i);
199 }
200
201 // Recode, remove or record positions of NaNs
202 List<Integer> nanPositions = null;
203 switch (nanStrategy) {
204 case MAXIMAL: // Replace NaNs with +INFs
205 recodeNaNs(ranks, Double.POSITIVE_INFINITY);
206 break;
207 case MINIMAL: // Replace NaNs with -INFs
208 recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
209 break;
210 case REMOVED: // Drop NaNs from data
211 ranks = removeNaNs(ranks);
212 break;
213 case FIXED: // Record positions of NaNs
214 nanPositions = getNanPositions(ranks);
215 break;
216 case FAILED:
217 nanPositions = getNanPositions(ranks);
218 if (nanPositions.size() > 0) {
219 throw new NotANumberException();
220 }
221 break;
222 default: // this should not happen unless NaNStrategy enum is changed
223 throw new MathInternalError();
224 }
225
226 // Sort the IntDoublePairs
227 Arrays.sort(ranks);
228
229 // Walk the sorted array, filling output array using sorted positions,
230 // resolving ties as we go
231 double[] out = new double[ranks.length];
232 int pos = 1; // position in sorted array
233 out[ranks[0].getPosition()] = pos;
234 List<Integer> tiesTrace = new ArrayList<Integer>();
235 tiesTrace.add(ranks[0].getPosition());
236 for (int i = 1; i < ranks.length; i++) {
237 if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
238 // tie sequence has ended (or had length 1)
239 pos = i + 1;
240 if (tiesTrace.size() > 1) { // if seq is nontrivial, resolve
241 resolveTie(out, tiesTrace);
242 }
243 tiesTrace = new ArrayList<Integer>();
244 tiesTrace.add(ranks[i].getPosition());
245 } else {
246 // tie sequence continues
247 tiesTrace.add(ranks[i].getPosition());
248 }
249 out[ranks[i].getPosition()] = pos;
250 }
251 if (tiesTrace.size() > 1) { // handle tie sequence at end
252 resolveTie(out, tiesTrace);
253 }
254 if (nanStrategy == NaNStrategy.FIXED) {
255 restoreNaNs(out, nanPositions);
256 }
257 return out;
258 }
259
260 /**
261 * Returns an array that is a copy of the input array with IntDoublePairs
262 * having NaN values removed.
263 *
264 * @param ranks input array
265 * @return array with NaN-valued entries removed
266 */
267 private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
268 if (!containsNaNs(ranks)) {
269 return ranks;
270 }
271 IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
272 int j = 0;
273 for (int i = 0; i < ranks.length; i++) {
274 if (Double.isNaN(ranks[i].getValue())) {
275 // drop, but adjust original ranks of later elements
276 for (int k = i + 1; k < ranks.length; k++) {
277 ranks[k] = new IntDoublePair(
278 ranks[k].getValue(), ranks[k].getPosition() - 1);
279 }
280 } else {
281 outRanks[j] = new IntDoublePair(
282 ranks[i].getValue(), ranks[i].getPosition());
283 j++;
284 }
285 }
286 IntDoublePair[] returnRanks = new IntDoublePair[j];
287 System.arraycopy(outRanks, 0, returnRanks, 0, j);
288 return returnRanks;
289 }
290
291 /**
292 * Recodes NaN values to the given value.
293 *
294 * @param ranks array to recode
295 * @param value the value to replace NaNs with
296 */
297 private void recodeNaNs(IntDoublePair[] ranks, double value) {
298 for (int i = 0; i < ranks.length; i++) {
299 if (Double.isNaN(ranks[i].getValue())) {
300 ranks[i] = new IntDoublePair(
301 value, ranks[i].getPosition());
302 }
303 }
304 }
305
306 /**
307 * Checks for presence of NaNs in <code>ranks.</code>
308 *
309 * @param ranks array to be searched for NaNs
310 * @return true iff ranks contains one or more NaNs
311 */
312 private boolean containsNaNs(IntDoublePair[] ranks) {
313 for (int i = 0; i < ranks.length; i++) {
314 if (Double.isNaN(ranks[i].getValue())) {
315 return true;
316 }
317 }
318 return false;
319 }
320
321 /**
322 * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
323 * The input <code>ranks</code> array is expected to take the same value
324 * for all indices in <code>tiesTrace</code>. The common value is recoded
325 * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
326 * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
327 * The same array and trace with tiesStrategy AVERAGE will come out
328 * <5,8,3,6,3,7,1,3>.
329 *
330 * @param ranks array of ranks
331 * @param tiesTrace list of indices where <code>ranks</code> is constant
332 * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
333 * </code>
334 */
335 private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
336
337 // constant value of ranks over tiesTrace
338 final double c = ranks[tiesTrace.get(0)];
339
340 // length of sequence of tied ranks
341 final int length = tiesTrace.size();
342
343 switch (tiesStrategy) {
344 case AVERAGE: // Replace ranks with average
345 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
346 break;
347 case MAXIMUM: // Replace ranks with maximum values
348 fill(ranks, tiesTrace, c + length - 1);
349 break;
350 case MINIMUM: // Replace ties with minimum
351 fill(ranks, tiesTrace, c);
352 break;
353 case RANDOM: // Fill with random integral values in [c, c + length - 1]
354 Iterator<Integer> iterator = tiesTrace.iterator();
355 long f = FastMath.round(c);
356 while (iterator.hasNext()) {
357 // No advertised exception because args are guaranteed valid
358 ranks[iterator.next()] =
359 randomData.nextLong(f, f + length - 1);
360 }
361 break;
362 case SEQUENTIAL: // Fill sequentially from c to c + length - 1
363 // walk and fill
364 iterator = tiesTrace.iterator();
365 f = FastMath.round(c);
366 int i = 0;
367 while (iterator.hasNext()) {
368 ranks[iterator.next()] = f + i++;
369 }
370 break;
371 default: // this should not happen unless TiesStrategy enum is changed
372 throw new MathInternalError();
373 }
374 }
375
376 /**
377 * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
378 *
379 * @param data array to modify
380 * @param tiesTrace list of index values to set
381 * @param value value to set
382 */
383 private void fill(double[] data, List<Integer> tiesTrace, double value) {
384 Iterator<Integer> iterator = tiesTrace.iterator();
385 while (iterator.hasNext()) {
386 data[iterator.next()] = value;
387 }
388 }
389
390 /**
391 * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
392 *
393 * @param ranks array to modify
394 * @param nanPositions list of index values to set to <code>Double.NaN</code>
395 */
396 private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
397 if (nanPositions.size() == 0) {
398 return;
399 }
400 Iterator<Integer> iterator = nanPositions.iterator();
401 while (iterator.hasNext()) {
402 ranks[iterator.next().intValue()] = Double.NaN;
403 }
404
405 }
406
407 /**
408 * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
409 *
410 * @param ranks array to search for <code>NaNs</code>
411 * @return list of indexes i such that <code>ranks[i] = NaN</code>
412 */
413 private List<Integer> getNanPositions(IntDoublePair[] ranks) {
414 ArrayList<Integer> out = new ArrayList<Integer>();
415 for (int i = 0; i < ranks.length; i++) {
416 if (Double.isNaN(ranks[i].getValue())) {
417 out.add(Integer.valueOf(i));
418 }
419 }
420 return out;
421 }
422
423 /**
424 * Represents the position of a double value in an ordering.
425 * Comparable interface is implemented so Arrays.sort can be used
426 * to sort an array of IntDoublePairs by value. Note that the
427 * implicitly defined natural ordering is NOT consistent with equals.
428 */
429 private static class IntDoublePair implements Comparable<IntDoublePair> {
430
431 /** Value of the pair */
432 private final double value;
433
434 /** Original position of the pair */
435 private final int position;
436
437 /**
438 * Construct an IntDoublePair with the given value and position.
439 * @param value the value of the pair
440 * @param position the original position
441 */
442 public IntDoublePair(double value, int position) {
443 this.value = value;
444 this.position = position;
445 }
446
447 /**
448 * Compare this IntDoublePair to another pair.
449 * Only the <strong>values</strong> are compared.
450 *
451 * @param other the other pair to compare this to
452 * @return result of <code>Double.compare(value, other.value)</code>
453 */
454 public int compareTo(IntDoublePair other) {
455 return Double.compare(value, other.value);
456 }
457
458 // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion.
459
460 /**
461 * Returns the value of the pair.
462 * @return value
463 */
464 public double getValue() {
465 return value;
466 }
467
468 /**
469 * Returns the original position of the pair.
470 * @return position
471 */
472 public int getPosition() {
473 return position;
474 }
475 }
476 }