001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math3.stat;
018
019 import java.io.Serializable;
020 import java.text.NumberFormat;
021 import java.util.Collection;
022 import java.util.Iterator;
023 import java.util.Comparator;
024 import java.util.Map;
025 import java.util.TreeMap;
026
027 import org.apache.commons.math3.exception.MathIllegalArgumentException;
028 import org.apache.commons.math3.exception.util.LocalizedFormats;
029
030 /**
031 * Maintains a frequency distribution.
032 * <p>
033 * Accepts int, long, char or Comparable values. New values added must be
034 * comparable to those that have been added, otherwise the add method will
035 * throw an IllegalArgumentException.</p>
036 * <p>
037 * Integer values (int, long, Integer, Long) are not distinguished by type --
038 * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have
039 * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
040 * <p>
041 * char values are converted by <code>addValue</code> to Character instances.
042 * As such, these values are not comparable to integral values, so attempts
043 * to combine integral types with chars in a frequency distribution will fail.
044 * </p>
045 * <p>
046 * The values are ordered using the default (natural order), unless a
047 * <code>Comparator</code> is supplied in the constructor.</p>
048 *
049 * @version $Id: Frequency.java 1422313 2012-12-15 18:53:41Z psteitz $
050 */
051 public class Frequency implements Serializable {
052
053 /** Serializable version identifier */
054 private static final long serialVersionUID = -3845586908418844111L;
055
056 /** underlying collection */
057 private final TreeMap<Comparable<?>, Long> freqTable;
058
059 /**
060 * Default constructor.
061 */
062 public Frequency() {
063 freqTable = new TreeMap<Comparable<?>, Long>();
064 }
065
066 /**
067 * Constructor allowing values Comparator to be specified.
068 *
069 * @param comparator Comparator used to order values
070 */
071 @SuppressWarnings("unchecked") // TODO is the cast OK?
072 public Frequency(Comparator<?> comparator) {
073 freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator);
074 }
075
076 /**
077 * Return a string representation of this frequency
078 * distribution.
079 *
080 * @return a string representation.
081 */
082 @Override
083 public String toString() {
084 NumberFormat nf = NumberFormat.getPercentInstance();
085 StringBuilder outBuffer = new StringBuilder();
086 outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
087 Iterator<Comparable<?>> iter = freqTable.keySet().iterator();
088 while (iter.hasNext()) {
089 Comparable<?> value = iter.next();
090 outBuffer.append(value);
091 outBuffer.append('\t');
092 outBuffer.append(getCount(value));
093 outBuffer.append('\t');
094 outBuffer.append(nf.format(getPct(value)));
095 outBuffer.append('\t');
096 outBuffer.append(nf.format(getCumPct(value)));
097 outBuffer.append('\n');
098 }
099 return outBuffer.toString();
100 }
101
102 /**
103 * Adds 1 to the frequency count for v.
104 * <p>
105 * If other objects have already been added to this Frequency, v must
106 * be comparable to those that have already been added.
107 * </p>
108 *
109 * @param v the value to add.
110 * @throws MathIllegalArgumentException if <code>v</code> is not comparable with previous entries
111 */
112 public void addValue(Comparable<?> v) throws MathIllegalArgumentException {
113 incrementValue(v, 1);
114 }
115
116 /**
117 * Increments the frequency count for v.
118 * <p>
119 * If other objects have already been added to this Frequency, v must
120 * be comparable to those that have already been added.
121 * </p>
122 *
123 * @param v the value to add.
124 * @param increment the amount by which the value should be incremented
125 * @throws IllegalArgumentException if <code>v</code> is not comparable with previous entries
126 * @since 3.1
127 */
128 public void incrementValue(Comparable<?> v, long increment){
129 Comparable<?> obj = v;
130 if (v instanceof Integer) {
131 obj = Long.valueOf(((Integer) v).longValue());
132 }
133 try {
134 Long count = freqTable.get(obj);
135 if (count == null) {
136 freqTable.put(obj, Long.valueOf(increment));
137 } else {
138 freqTable.put(obj, Long.valueOf(count.longValue() + increment));
139 }
140 } catch (ClassCastException ex) {
141 //TreeMap will throw ClassCastException if v is not comparable
142 throw new MathIllegalArgumentException(
143 LocalizedFormats.INSTANCES_NOT_COMPARABLE_TO_EXISTING_VALUES,
144 v.getClass().getName());
145 }
146 }
147
148 /**
149 * Adds 1 to the frequency count for v.
150 *
151 * @param v the value to add.
152 * @throws MathIllegalArgumentException if the table contains entries not
153 * comparable to Integer
154 */
155 public void addValue(int v) throws MathIllegalArgumentException {
156 addValue(Long.valueOf(v));
157 }
158
159 /**
160 * Adds 1 to the frequency count for v.
161 *
162 * @param v the value to add.
163 * @throws MathIllegalArgumentException if the table contains entries not
164 * comparable to Long
165 */
166 public void addValue(long v) throws MathIllegalArgumentException {
167 addValue(Long.valueOf(v));
168 }
169
170 /**
171 * Adds 1 to the frequency count for v.
172 *
173 * @param v the value to add.
174 * @throws MathIllegalArgumentException if the table contains entries not
175 * comparable to Char
176 */
177 public void addValue(char v) throws MathIllegalArgumentException {
178 addValue(Character.valueOf(v));
179 }
180
181 /** Clears the frequency table */
182 public void clear() {
183 freqTable.clear();
184 }
185
186 /**
187 * Returns an Iterator over the set of values that have been added.
188 * <p>
189 * If added values are integral (i.e., integers, longs, Integers, or Longs),
190 * they are converted to Longs when they are added, so the objects returned
191 * by the Iterator will in this case be Longs.</p>
192 *
193 * @return values Iterator
194 */
195 public Iterator<Comparable<?>> valuesIterator() {
196 return freqTable.keySet().iterator();
197 }
198
199 /**
200 * Return an Iterator over the set of keys and values that have been added.
201 * Using the entry set to iterate is more efficient in the case where you
202 * need to access respective counts as well as values, since it doesn't
203 * require a "get" for every key...the value is provided in the Map.Entry.
204 * <p>
205 * If added values are integral (i.e., integers, longs, Integers, or Longs),
206 * they are converted to Longs when they are added, so the values of the
207 * map entries returned by the Iterator will in this case be Longs.</p>
208 *
209 * @return entry set Iterator
210 * @since 3.1
211 */
212 public Iterator<Map.Entry<Comparable<?>, Long>> entrySetIterator() {
213 return freqTable.entrySet().iterator();
214 }
215
216 //-------------------------------------------------------------------------
217
218 /**
219 * Returns the sum of all frequencies.
220 *
221 * @return the total frequency count.
222 */
223 public long getSumFreq() {
224 long result = 0;
225 Iterator<Long> iterator = freqTable.values().iterator();
226 while (iterator.hasNext()) {
227 result += iterator.next().longValue();
228 }
229 return result;
230 }
231
232 /**
233 * Returns the number of values = v.
234 * Returns 0 if the value is not comparable.
235 *
236 * @param v the value to lookup.
237 * @return the frequency of v.
238 */
239 public long getCount(Comparable<?> v) {
240 if (v instanceof Integer) {
241 return getCount(((Integer) v).longValue());
242 }
243 long result = 0;
244 try {
245 Long count = freqTable.get(v);
246 if (count != null) {
247 result = count.longValue();
248 }
249 } catch (ClassCastException ex) { // NOPMD
250 // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
251 }
252 return result;
253 }
254
255 /**
256 * Returns the number of values = v.
257 *
258 * @param v the value to lookup.
259 * @return the frequency of v.
260 */
261 public long getCount(int v) {
262 return getCount(Long.valueOf(v));
263 }
264
265 /**
266 * Returns the number of values = v.
267 *
268 * @param v the value to lookup.
269 * @return the frequency of v.
270 */
271 public long getCount(long v) {
272 return getCount(Long.valueOf(v));
273 }
274
275 /**
276 * Returns the number of values = v.
277 *
278 * @param v the value to lookup.
279 * @return the frequency of v.
280 */
281 public long getCount(char v) {
282 return getCount(Character.valueOf(v));
283 }
284
285 /**
286 * Returns the number of values in the frequency table.
287 *
288 * @return the number of unique values that have been added to the frequency table.
289 * @see #valuesIterator()
290 */
291 public int getUniqueCount(){
292 return freqTable.keySet().size();
293 }
294
295 /**
296 * Returns the percentage of values that are equal to v
297 * (as a proportion between 0 and 1).
298 * <p>
299 * Returns <code>Double.NaN</code> if no values have been added.</p>
300 *
301 * @param v the value to lookup
302 * @return the proportion of values equal to v
303 */
304 public double getPct(Comparable<?> v) {
305 final long sumFreq = getSumFreq();
306 if (sumFreq == 0) {
307 return Double.NaN;
308 }
309 return (double) getCount(v) / (double) sumFreq;
310 }
311
312 /**
313 * Returns the percentage of values that are equal to v
314 * (as a proportion between 0 and 1).
315 *
316 * @param v the value to lookup
317 * @return the proportion of values equal to v
318 */
319 public double getPct(int v) {
320 return getPct(Long.valueOf(v));
321 }
322
323 /**
324 * Returns the percentage of values that are equal to v
325 * (as a proportion between 0 and 1).
326 *
327 * @param v the value to lookup
328 * @return the proportion of values equal to v
329 */
330 public double getPct(long v) {
331 return getPct(Long.valueOf(v));
332 }
333
334 /**
335 * Returns the percentage of values that are equal to v
336 * (as a proportion between 0 and 1).
337 *
338 * @param v the value to lookup
339 * @return the proportion of values equal to v
340 */
341 public double getPct(char v) {
342 return getPct(Character.valueOf(v));
343 }
344
345 //-----------------------------------------------------------------------------------------
346
347 /**
348 * Returns the cumulative frequency of values less than or equal to v.
349 * <p>
350 * Returns 0 if v is not comparable to the values set.</p>
351 *
352 * @param v the value to lookup.
353 * @return the proportion of values equal to v
354 */
355 @SuppressWarnings({ "rawtypes", "unchecked" })
356 public long getCumFreq(Comparable<?> v) {
357 if (getSumFreq() == 0) {
358 return 0;
359 }
360 if (v instanceof Integer) {
361 return getCumFreq(((Integer) v).longValue());
362 }
363 Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator();
364 if (c == null) {
365 c = new NaturalComparator();
366 }
367 long result = 0;
368
369 try {
370 Long value = freqTable.get(v);
371 if (value != null) {
372 result = value.longValue();
373 }
374 } catch (ClassCastException ex) {
375 return result; // v is not comparable
376 }
377
378 if (c.compare(v, freqTable.firstKey()) < 0) {
379 return 0; // v is comparable, but less than first value
380 }
381
382 if (c.compare(v, freqTable.lastKey()) >= 0) {
383 return getSumFreq(); // v is comparable, but greater than the last value
384 }
385
386 Iterator<Comparable<?>> values = valuesIterator();
387 while (values.hasNext()) {
388 Comparable<?> nextValue = values.next();
389 if (c.compare(v, nextValue) > 0) {
390 result += getCount(nextValue);
391 } else {
392 return result;
393 }
394 }
395 return result;
396 }
397
398 /**
399 * Returns the cumulative frequency of values less than or equal to v.
400 * <p>
401 * Returns 0 if v is not comparable to the values set.</p>
402 *
403 * @param v the value to lookup
404 * @return the proportion of values equal to v
405 */
406 public long getCumFreq(int v) {
407 return getCumFreq(Long.valueOf(v));
408 }
409
410 /**
411 * Returns the cumulative frequency of values less than or equal to v.
412 * <p>
413 * Returns 0 if v is not comparable to the values set.</p>
414 *
415 * @param v the value to lookup
416 * @return the proportion of values equal to v
417 */
418 public long getCumFreq(long v) {
419 return getCumFreq(Long.valueOf(v));
420 }
421
422 /**
423 * Returns the cumulative frequency of values less than or equal to v.
424 * <p>
425 * Returns 0 if v is not comparable to the values set.</p>
426 *
427 * @param v the value to lookup
428 * @return the proportion of values equal to v
429 */
430 public long getCumFreq(char v) {
431 return getCumFreq(Character.valueOf(v));
432 }
433
434 //----------------------------------------------------------------------------------------------
435
436 /**
437 * Returns the cumulative percentage of values less than or equal to v
438 * (as a proportion between 0 and 1).
439 * <p>
440 * Returns <code>Double.NaN</code> if no values have been added.
441 * Returns 0 if at least one value has been added, but v is not comparable
442 * to the values set.</p>
443 *
444 * @param v the value to lookup
445 * @return the proportion of values less than or equal to v
446 */
447 public double getCumPct(Comparable<?> v) {
448 final long sumFreq = getSumFreq();
449 if (sumFreq == 0) {
450 return Double.NaN;
451 }
452 return (double) getCumFreq(v) / (double) sumFreq;
453 }
454
455 /**
456 * Returns the cumulative percentage of values less than or equal to v
457 * (as a proportion between 0 and 1).
458 * <p>
459 * Returns 0 if v is not comparable to the values set.</p>
460 *
461 * @param v the value to lookup
462 * @return the proportion of values less than or equal to v
463 */
464 public double getCumPct(int v) {
465 return getCumPct(Long.valueOf(v));
466 }
467
468 /**
469 * Returns the cumulative percentage of values less than or equal to v
470 * (as a proportion between 0 and 1).
471 * <p>
472 * Returns 0 if v is not comparable to the values set.</p>
473 *
474 * @param v the value to lookup
475 * @return the proportion of values less than or equal to v
476 */
477 public double getCumPct(long v) {
478 return getCumPct(Long.valueOf(v));
479 }
480
481 /**
482 * Returns the cumulative percentage of values less than or equal to v
483 * (as a proportion between 0 and 1).
484 * <p>
485 * Returns 0 if v is not comparable to the values set.</p>
486 *
487 * @param v the value to lookup
488 * @return the proportion of values less than or equal to v
489 */
490 public double getCumPct(char v) {
491 return getCumPct(Character.valueOf(v));
492 }
493
494 //----------------------------------------------------------------------------------------------
495
496 /**
497 * Merge another Frequency object's counts into this instance.
498 * This Frequency's counts will be incremented (or set when not already set)
499 * by the counts represented by other.
500 *
501 * @param other the other {@link Frequency} object to be merged
502 * @since 3.1
503 */
504 public void merge(Frequency other) {
505 for (Iterator<Map.Entry<Comparable<?>, Long>> iter = other.entrySetIterator(); iter.hasNext();) {
506 Map.Entry<Comparable<?>, Long> entry = iter.next();
507 incrementValue(entry.getKey(), entry.getValue());
508 }
509 }
510
511 /**
512 * Merge a {@link Collection} of {@link Frequency} objects into this instance.
513 * This Frequency's counts will be incremented (or set when not already set)
514 * by the counts represented by each of the others.
515 *
516 * @param others the other {@link Frequency} objects to be merged
517 * @since 3.1
518 */
519 public void merge(Collection<Frequency> others) {
520 for (Iterator<Frequency> iter = others.iterator(); iter.hasNext();) {
521 merge(iter.next());
522 }
523 }
524
525 //----------------------------------------------------------------------------------------------
526
527 /**
528 * A Comparator that compares comparable objects using the
529 * natural order. Copied from Commons Collections ComparableComparator.
530 */
531 private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable {
532
533 /** Serializable version identifier */
534 private static final long serialVersionUID = -3852193713161395148L;
535
536 /**
537 * Compare the two {@link Comparable Comparable} arguments.
538 * This method is equivalent to:
539 * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
540 *
541 * @param o1 the first object
542 * @param o2 the second object
543 * @return result of comparison
544 * @throws NullPointerException when <i>o1</i> is <code>null</code>,
545 * or when <code>((Comparable)o1).compareTo(o2)</code> does
546 * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
547 * or when <code>((Comparable)o1).compareTo(o2)</code> does
548 */
549 @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc
550 public int compare(Comparable<T> o1, Comparable<T> o2) {
551 return o1.compareTo((T) o2);
552 }
553 }
554
555 /** {@inheritDoc} */
556 @Override
557 public int hashCode() {
558 final int prime = 31;
559 int result = 1;
560 result = prime * result +
561 ((freqTable == null) ? 0 : freqTable.hashCode());
562 return result;
563 }
564
565 /** {@inheritDoc} */
566 @Override
567 public boolean equals(Object obj) {
568 if (this == obj) {
569 return true;
570 }
571 if (!(obj instanceof Frequency)) {
572 return false;
573 }
574 Frequency other = (Frequency) obj;
575 if (freqTable == null) {
576 if (other.freqTable != null) {
577 return false;
578 }
579 } else if (!freqTable.equals(other.freqTable)) {
580 return false;
581 }
582 return true;
583 }
584
585 }