1 /*
2 * Created on Jul 15, 2007
3 *
4 * Copyright (c) 2007, The JUNG Authors
5 *
6 * All rights reserved.
7 *
8 * This software is open-source under the BSD license; see either
9 * "license.txt" or
10 * https://github.com/jrtom/jung/blob/master/LICENSE for a description.
11 */
12 package edu.uci.ics.jung.algorithms.scoring;
13
14 import com.google.common.base.Function;
15
16 import edu.uci.ics.jung.algorithms.scoring.util.ScoringUtils;
17 import edu.uci.ics.jung.graph.Graph;
18
19 /**
20 * Assigns hub and authority scores to each vertex depending on the topology of
21 * the network. The essential idea is that a vertex is a hub to the extent
22 * that it links to authoritative vertices, and is an authority to the extent
23 * that it links to 'hub' vertices.
24 *
25 * <p>The classic HITS algorithm essentially proceeds as follows:
26 * <pre>
27 * assign equal initial hub and authority values to each vertex
28 * repeat
29 * for each vertex w:
30 * w.hub = sum over successors x of x.authority
31 * w.authority = sum over predecessors v of v.hub
32 * normalize hub and authority scores so that the sum of the squares of each = 1
33 * until scores converge
34 * </pre>
35 *
36 * HITS is somewhat different from random walk/eigenvector-based algorithms
37 * such as PageRank in that:
38 * <ul>
39 * <li>there are two mutually recursive scores being calculated, rather than
40 * a single value
41 * <li>the edge weights are effectively all 1, i.e., they can't be interpreted
42 * as transition probabilities. This means that the more inlinks and outlinks
43 * that a vertex has, the better, since adding an inlink (or outlink) does
44 * not dilute the influence of the other inlinks (or outlinks) as in
45 * random walk-based algorithms.
46 * <li>the scores cannot be interpreted as posterior probabilities (due to the different
47 * normalization)
48 * </ul>
49 *
50 * This implementation has the classic behavior by default. However, it has
51 * been generalized somewhat so that it can act in a more "PageRank-like" fashion:
52 * <ul>
53 * <li>this implementation has an optional 'random jump probability' parameter analogous
54 * to the 'alpha' parameter used by PageRank. Varying this value between 0 and 1
55 * allows the user to vary between the classic HITS behavior and one in which the
56 * scores are smoothed to a uniform distribution.
57 * The default value for this parameter is 0 (no random jumps possible).
58 * <li>the edge weights can be set to anything the user likes, and in
59 * particular they can be set up (e.g. using <code>UniformDegreeWeight</code>)
60 * so that the weights of the relevant edges incident to a vertex sum to 1.
61 * <li>The vertex score normalization has been factored into its own method
62 * so that it can be overridden by a subclass. Thus, for example,
63 * since the vertices' values are set to sum to 1 initially, if the weights of the
64 * relevant edges incident to a vertex sum to 1, then the vertices' values
65 * will continue to sum to 1 if the "sum-of-squares" normalization code
66 * is overridden to a no-op. (Other normalization methods may also be employed.)
67 * </ul>
68 *
69 * @param <V> the vertex type
70 * @param <E> the edge type
71 *
72 * @see "'Authoritative sources in a hyperlinked environment' by Jon Kleinberg, 1997"
73 */
74 public class HITS<V,E> extends HITSWithPriors<V,E>
75 {
76
77 /**
78 * Creates an instance for the specified graph, edge weights, and alpha
79 * (random jump probability) parameter.
80 * @param g the input graph
81 * @param edge_weights the weights to use for each edge
82 * @param alpha the probability of a hub giving some authority to all vertices,
83 * and of an authority increasing the score of all hubs (not just those connected
84 * via links)
85 */
86 public HITS(Graph<V,E> g, Function<E, Double> edge_weights, double alpha)
87 {
88 super(g, edge_weights, ScoringUtils.getHITSUniformRootPrior(g.getVertices()), alpha);
89 }
90
91 /**
92 * Creates an instance for the specified graph and alpha (random jump probability)
93 * parameter. The edge weights are all set to 1.
94 * @param g the input graph
95 * @param alpha the probability of a hub giving some authority to all vertices,
96 * and of an authority increasing the score of all hubs (not just those connected
97 * via links)
98 */
99 public HITS(Graph<V,E> g, double alpha)
100 {
101 super(g, ScoringUtils.getHITSUniformRootPrior(g.getVertices()), alpha);
102 }
103
104 /**
105 * Creates an instance for the specified graph. The edge weights are all set to 1
106 * and alpha is set to 0.
107 * @param g the input graph
108 */
109 public HITS(Graph<V,E> g)
110 {
111 this(g, 0.0);
112 }
113
114
115 /**
116 * Maintains hub and authority score information for a vertex.
117 */
118 public static class Scores
119 {
120 /**
121 * The hub score for a vertex.
122 */
123 public double hub;
124
125 /**
126 * The authority score for a vertex.
127 */
128 public double authority;
129
130 /**
131 * Creates an instance with the specified hub and authority score.
132 * @param hub the hub score
133 * @param authority the authority score
134 */
135 public Scores(double hub, double authority)
136 {
137 this.hub = hub;
138 this.authority = authority;
139 }
140
141 @Override
142 public String toString()
143 {
144 return String.format("[h:%.4f,a:%.4f]", this.hub, this.authority);
145 }
146 }
147 }