View Javadoc
1   /*
2    * Created on Jul 15, 2007
3    *
4    * Copyright (c) 2007, The JUNG Authors 
5    *
6    * All rights reserved.
7    *
8    * This software is open-source under the BSD license; see either
9    * "license.txt" or
10   * https://github.com/jrtom/jung/blob/master/LICENSE for a description.
11   */
12  package edu.uci.ics.jung.algorithms.scoring;
13  
14  import com.google.common.base.Function;
15  
16  import edu.uci.ics.jung.algorithms.scoring.util.ScoringUtils;
17  import edu.uci.ics.jung.graph.Graph;
18  
19  /**
20   * Assigns hub and authority scores to each vertex depending on the topology of
21   * the network.  The essential idea is that a vertex is a hub to the extent 
22   * that it links to authoritative vertices, and is an authority to the extent
23   * that it links to 'hub' vertices.
24   * 
25   * <p>The classic HITS algorithm essentially proceeds as follows:
26   * <pre>
27   * assign equal initial hub and authority values to each vertex
28   * repeat
29   *   for each vertex w:
30   *     w.hub = sum over successors x of x.authority
31   *     w.authority = sum over predecessors v of v.hub
32   *   normalize hub and authority scores so that the sum of the squares of each = 1
33   * until scores converge
34   * </pre>
35   * 
36   * HITS is somewhat different from random walk/eigenvector-based algorithms 
37   * such as PageRank in that: 
38   * <ul>
39   * <li>there are two mutually recursive scores being calculated, rather than 
40   * a single value
41   * <li>the edge weights are effectively all 1, i.e., they can't be interpreted
42   * as transition probabilities.  This means that the more inlinks and outlinks
43   * that a vertex has, the better, since adding an inlink (or outlink) does
44   * not dilute the influence of the other inlinks (or outlinks) as in 
45   * random walk-based algorithms.
46   * <li>the scores cannot be interpreted as posterior probabilities (due to the different
47   * normalization)
48   * </ul>
49   * 
50   * This implementation has the classic behavior by default.  However, it has
51   * been generalized somewhat so that it can act in a more "PageRank-like" fashion:
52   * <ul>
53   * <li>this implementation has an optional 'random jump probability' parameter analogous
54   * to the 'alpha' parameter used by PageRank.  Varying this value between 0 and 1
55   * allows the user to vary between the classic HITS behavior and one in which the
56   * scores are smoothed to a uniform distribution.
57   * The default value for this parameter is 0 (no random jumps possible).
58   * <li>the edge weights can be set to anything the user likes, and in 
59   * particular they can be set up (e.g. using <code>UniformDegreeWeight</code>)
60   * so that the weights of the relevant edges incident to a vertex sum to 1.
61   * <li>The vertex score normalization has been factored into its own method
62   * so that it can be overridden by a subclass.  Thus, for example, 
63   * since the vertices' values are set to sum to 1 initially, if the weights of the
64   * relevant edges incident to a vertex sum to 1, then the vertices' values
65   * will continue to sum to 1 if the "sum-of-squares" normalization code
66   * is overridden to a no-op.  (Other normalization methods may also be employed.)
67   * </ul>
68   * 
69   * @param <V> the vertex type
70   * @param <E> the edge type
71   * 
72   * @see "'Authoritative sources in a hyperlinked environment' by Jon Kleinberg, 1997"
73   */
74  public class HITS<V,E> extends HITSWithPriors<V,E>
75  {
76  
77      /**
78       * Creates an instance for the specified graph, edge weights, and alpha
79       * (random jump probability) parameter.
80       * @param g the input graph
81       * @param edge_weights the weights to use for each edge
82       * @param alpha the probability of a hub giving some authority to all vertices,
83       * and of an authority increasing the score of all hubs (not just those connected
84       * via links)
85       */
86      public HITS(Graph<V,E> g, Function<E, Double> edge_weights, double alpha)
87      {
88          super(g, edge_weights, ScoringUtils.getHITSUniformRootPrior(g.getVertices()), alpha);
89      }
90  
91      /**
92       * Creates an instance for the specified graph and alpha (random jump probability)
93       * parameter.  The edge weights are all set to 1.
94       * @param g the input graph
95       * @param alpha the probability of a hub giving some authority to all vertices,
96       * and of an authority increasing the score of all hubs (not just those connected
97       * via links)
98       */
99      public HITS(Graph<V,E> g, double alpha)
100     {
101         super(g, ScoringUtils.getHITSUniformRootPrior(g.getVertices()), alpha);
102     }
103 
104     /**
105      * Creates an instance for the specified graph.  The edge weights are all set to 1
106      * and alpha is set to 0.
107      * @param g the input graph
108      */
109     public HITS(Graph<V,E> g)
110     {
111         this(g, 0.0);
112     }
113     
114 
115     /**
116      * Maintains hub and authority score information for a vertex.
117      */
118     public static class Scores
119     {
120     	/**
121     	 * The hub score for a vertex.
122     	 */
123     	public double hub;
124     	
125     	/**
126     	 * The authority score for a vertex.
127     	 */
128     	public double authority;
129     	
130     	/**
131     	 * Creates an instance with the specified hub and authority score.
132     	 * @param hub the hub score
133     	 * @param authority the authority score
134     	 */
135     	public Scores(double hub, double authority)
136     	{
137     		this.hub = hub;
138     		this.authority = authority;
139     	}
140     	
141     	@Override
142         public String toString()
143     	{
144     		return String.format("[h:%.4f,a:%.4f]", this.hub, this.authority);
145     	}
146     }
147 }