View Javadoc
1   /*
2    * Copyright (c) 2003, The JUNG Authors 
3    *
4    * All rights reserved.
5    *
6    * This software is open-source under the BSD license; see either
7    * "license.txt" or
8    * https://github.com/jrtom/jung/blob/master/LICENSE for a description.
9    */
10  package edu.uci.ics.jung.algorithms.metrics;
11  
12  import java.util.ArrayList;
13  import java.util.HashSet;
14  import java.util.List;
15  import java.util.Set;
16  
17  import edu.uci.ics.jung.graph.DirectedGraph;
18  import edu.uci.ics.jung.graph.Graph;
19  
20  
21  /**
22   * TriadicCensus is a standard social network tool that counts, for each of the 
23   * different possible configurations of three vertices, the number of times
24   * that that configuration occurs in the given graph.
25   * This may then be compared to the set of expected counts for this particular
26   * graph or to an expected sample. This is often used in p* modeling.
27   * <p>
28   * To use this class, 
29   * <pre>
30   * long[] triad_counts = TriadicCensus(dg);
31   * </pre>
32   * where <code>dg</code> is a <code>DirectedGraph</code>.
33   * ith element of the array (for i in [1,16]) is the number of 
34   * occurrences of the corresponding triad type.
35   * (The 0th element is not meaningful; this array is effectively 1-based.)
36   * To get the name of the ith triad (e.g. "003"), 
37   * look at the global constant array c.TRIAD_NAMES[i]
38   * <p>
39   * Triads are named as 
40   * (number of pairs that are mutually tied)
41   * (number of pairs that are one-way tied)
42   * (number of non-tied pairs)
43   * in the triple. Since there are be only three pairs, there is a finite
44   * set of these possible triads.
45   * <p>
46   * In fact, there are exactly 16, conventionally sorted by the number of 
47   * realized edges in the triad:
48   * <table>
49   * <caption>Descriptions of the different types of triads</caption>
50   * <tr><th>Number</th> <th>Configuration</th> <th>Notes</th></tr>
51   * <tr><td>1</td><td>003</td><td>The empty triad</td></tr>
52   * <tr><td>2</td><td>012</td><td></td></tr>
53   * <tr><td>3</td><td>102</td><td></td></tr>
54   * <tr><td>4</td><td>021D</td><td>"Down": the directed edges point away</td></tr>
55   * <tr><td>5</td><td>021U</td><td>"Up": the directed edges meet</td></tr>
56   * <tr><td>6</td><td>021C</td><td>"Circle": one in, one out</td></tr>
57   * <tr><td>7</td><td>111D</td><td>"Down": 021D but one edge is mutual</td></tr>
58   * <tr><td>8</td><td>111U</td><td>"Up": 021U but one edge is mutual</td></tr>
59   * <tr><td>9</td><td>030T</td><td>"Transitive": two point to the same vertex</td></tr>
60   * <tr><td>10</td><td>030C</td><td>"Circle": A&#8594;B&#8594;C&#8594;A</td></tr>
61   * <tr><td>11</td><td>201</td><td></td></tr>
62   * <tr><td>12</td><td>120D</td><td>"Down": 021D but the third edge is mutual</td></tr>
63   * <tr><td>13</td><td>120U</td><td>"Up": 021U but the third edge is mutual</td></tr>
64   * <tr><td>14</td><td>120C</td><td>"Circle": 021C but the third edge is mutual</td></tr>
65   * <tr><td>15</td><td>210</td><td></td></tr>
66   * <tr><td>16</td><td>300</td><td>The complete</td></tr>
67   * </table>
68   * <p>
69   * This implementation takes O( m ), m is the number of edges in the graph. 
70   * <br>
71   * It is based on 
72   * <a href="http://vlado.fmf.uni-lj.si/pub/networks/doc/triads/triads.pdf">
73   * A subquadratic triad census algorithm for large sparse networks 
74   * with small maximum degree</a>
75   * Vladimir Batagelj and Andrej Mrvar, University of Ljubljana
76   * Published in Social Networks.
77   * @author Danyel Fisher
78   * @author Tom Nelson - converted to jung2
79   *
80   */
81  public class TriadicCensus {
82  
83  	// NOTE THAT THIS RETURNS STANDARD 1-16 COUNT!
84  
85  	// and their types
86  	public static final String[] TRIAD_NAMES = { "N/A", "003", "012", "102", "021D",
87  			"021U", "021C", "111D", "111U", "030T", "030C", "201", "120D",
88  			"120U", "120C", "210", "300" };
89  
90  	public static final int MAX_TRIADS = TRIAD_NAMES.length;
91  
92  	/**
93       * Returns an array whose ith element (for i in [1,16]) is the number of 
94       * occurrences of the corresponding triad type in <code>g</code>.
95       * (The 0th element is not meaningful; this array is effectively 1-based.)
96  	 * 
97  	 * @param g the graph whose properties are being measured
98  	 * @param <V> the vertex type
99  	 * @param <E> the edge type
100 	 * @return an array encoding the number of occurrences of each triad type
101 	 */
102     public static <V,E> long[] getCounts(DirectedGraph<V,E> g) {
103         long[] count = new long[MAX_TRIADS];
104 
105         List<V> id = new ArrayList<V>(g.getVertices());
106 
107 		// apply algorithm to each edge, one at at time
108 		for (int i_v = 0; i_v < g.getVertexCount(); i_v++) {
109 			V v = id.get(i_v);
110 			for(V u : g.getNeighbors(v)) {
111 				int triType = -1;
112 				if (id.indexOf(u) <= i_v)
113 					continue;
114 				Set<V> neighbors = new HashSet<V>(g.getNeighbors(u));
115 				neighbors.addAll(g.getNeighbors(v));
116 				neighbors.remove(u);
117 				neighbors.remove(v);
118 				if (g.isSuccessor(v,u) && g.isSuccessor(u,v)) {
119 					triType = 3;
120 				} else {
121 					triType = 2;
122 				}
123 				count[triType] += g.getVertexCount() - neighbors.size() - 2;
124 				for (V w : neighbors) {
125 					if (shouldCount(g, id, u, v, w)) {
126 						count [ triType ( triCode(g, u, v, w) ) ] ++;
127 					}
128 				}
129 			}
130 		}
131 		int sum = 0;
132 		for (int i = 2; i <= 16; i++) {
133 			sum += count[i];
134 		}
135 		int n = g.getVertexCount();
136 		count[1] = n * (n-1) * (n-2) / 6 - sum;
137 		return count;		
138 	}
139 
140     /**
141 	 * This is the core of the technique in the paper. Returns an int from 0 to
142 	 * 63 which encodes the presence of all possible links between u, v, and w 
143 	 * as bit flags: WU = 32, UW = 16, WV = 8, VW = 4, UV = 2, VU = 1
144      * 
145      * @param g the graph for which the calculation is being made
146      * @param u a vertex in g
147      * @param v a vertex in g
148      * @param w a vertex in g
149      * @param <V> the vertex type
150      * @param <E> the edge type
151      * @return an int encoding the presence of all links between u, v, and w
152      */
153 	public static <V,E> int triCode(Graph<V,E> g, V u, V v, V w) {
154 		int i = 0;
155 		i += link(g, v, u ) ? 1 : 0;
156 		i += link(g, u, v ) ? 2 : 0;
157 		i += link(g, v, w ) ? 4 : 0;
158 		i += link(g, w, v ) ? 8 : 0;
159 		i += link(g, u, w ) ? 16 : 0;
160 		i += link(g, w, u ) ? 32 : 0;
161 		return i;
162 	}
163 
164 	protected static <V,E> boolean link(Graph<V,E> g, V a, V b) {
165 		return g.isPredecessor(b, a);
166 	}
167 	
168 	
169 	/**
170 	 * @param triCode the code returned by {@code triCode()}
171 	 * @return the string code associated with the numeric type
172 	 */
173 	public static int triType( int triCode ) {
174 		return codeToType[ triCode ];
175 	}
176 
177 	/**
178 	 * For debugging purposes, this is copied straight out of the paper which
179 	 * means that they refer to triad types 1-16.
180 	 */
181 	protected static final int[] codeToType = { 1, 2, 2, 3, 2, 4, 6, 8, 2, 6, 5, 7, 3, 8,
182 			7, 11, 2, 6, 4, 8, 5, 9, 9, 13, 6, 10, 9, 14, 7, 14, 12, 15, 2, 5,
183 			6, 7, 6, 9, 10, 14, 4, 9, 9, 12, 8, 13, 14, 15, 3, 7, 8, 11, 7, 12,
184 			14, 15, 8, 14, 13, 15, 11, 15, 15, 16 };
185 
186 	/**
187 	 * Return true iff this ordering is canonical and therefore we should build statistics for it.
188 	 * 
189 	 * @param g the graph whose properties are being examined
190 	 * @param id a list of the vertices in g; used to assign an index to each
191 	 * @param u a vertex in g
192 	 * @param v a vertex in g
193 	 * @param w a vertex in g
194      * @param <V> the vertex type
195      * @param <E> the edge type
196 	 * @return true if index(u) &lt; index(w), or if index(v) &lt; index(w) &lt; index(u)
197 	 *     and v doesn't link to w; false otherwise
198 	 */
199 	protected static <V,E> boolean shouldCount(Graph<V,E> g, List<V> id, V u, V v, V w) {
200 		int i_u = id.indexOf(u);
201 		int i_w = id.indexOf(w);
202 		if (i_u < i_w)
203 			return true;
204 		int i_v = id.indexOf(v);
205 		if ((i_v < i_w) && (i_w < i_u) && (!g.isNeighbor(w,v)))
206 			return true;
207 		return false;
208 	}
209 }