1 /*
2 * Copyright (c) 2003, The JUNG Authors
3 *
4 * All rights reserved.
5 *
6 * This software is open-source under the BSD license; see either
7 * "license.txt" or
8 * https://github.com/jrtom/jung/blob/master/LICENSE for a description.
9 */
10 package edu.uci.ics.jung.algorithms.metrics;
11
12 import java.util.ArrayList;
13 import java.util.HashSet;
14 import java.util.List;
15 import java.util.Set;
16
17 import edu.uci.ics.jung.graph.DirectedGraph;
18 import edu.uci.ics.jung.graph.Graph;
19
20
21 /**
22 * TriadicCensus is a standard social network tool that counts, for each of the
23 * different possible configurations of three vertices, the number of times
24 * that that configuration occurs in the given graph.
25 * This may then be compared to the set of expected counts for this particular
26 * graph or to an expected sample. This is often used in p* modeling.
27 * <p>
28 * To use this class,
29 * <pre>
30 * long[] triad_counts = TriadicCensus(dg);
31 * </pre>
32 * where <code>dg</code> is a <code>DirectedGraph</code>.
33 * ith element of the array (for i in [1,16]) is the number of
34 * occurrences of the corresponding triad type.
35 * (The 0th element is not meaningful; this array is effectively 1-based.)
36 * To get the name of the ith triad (e.g. "003"),
37 * look at the global constant array c.TRIAD_NAMES[i]
38 * <p>
39 * Triads are named as
40 * (number of pairs that are mutually tied)
41 * (number of pairs that are one-way tied)
42 * (number of non-tied pairs)
43 * in the triple. Since there are be only three pairs, there is a finite
44 * set of these possible triads.
45 * <p>
46 * In fact, there are exactly 16, conventionally sorted by the number of
47 * realized edges in the triad:
48 * <table>
49 * <caption>Descriptions of the different types of triads</caption>
50 * <tr><th>Number</th> <th>Configuration</th> <th>Notes</th></tr>
51 * <tr><td>1</td><td>003</td><td>The empty triad</td></tr>
52 * <tr><td>2</td><td>012</td><td></td></tr>
53 * <tr><td>3</td><td>102</td><td></td></tr>
54 * <tr><td>4</td><td>021D</td><td>"Down": the directed edges point away</td></tr>
55 * <tr><td>5</td><td>021U</td><td>"Up": the directed edges meet</td></tr>
56 * <tr><td>6</td><td>021C</td><td>"Circle": one in, one out</td></tr>
57 * <tr><td>7</td><td>111D</td><td>"Down": 021D but one edge is mutual</td></tr>
58 * <tr><td>8</td><td>111U</td><td>"Up": 021U but one edge is mutual</td></tr>
59 * <tr><td>9</td><td>030T</td><td>"Transitive": two point to the same vertex</td></tr>
60 * <tr><td>10</td><td>030C</td><td>"Circle": A→B→C→A</td></tr>
61 * <tr><td>11</td><td>201</td><td></td></tr>
62 * <tr><td>12</td><td>120D</td><td>"Down": 021D but the third edge is mutual</td></tr>
63 * <tr><td>13</td><td>120U</td><td>"Up": 021U but the third edge is mutual</td></tr>
64 * <tr><td>14</td><td>120C</td><td>"Circle": 021C but the third edge is mutual</td></tr>
65 * <tr><td>15</td><td>210</td><td></td></tr>
66 * <tr><td>16</td><td>300</td><td>The complete</td></tr>
67 * </table>
68 * <p>
69 * This implementation takes O( m ), m is the number of edges in the graph.
70 * <br>
71 * It is based on
72 * <a href="http://vlado.fmf.uni-lj.si/pub/networks/doc/triads/triads.pdf">
73 * A subquadratic triad census algorithm for large sparse networks
74 * with small maximum degree</a>
75 * Vladimir Batagelj and Andrej Mrvar, University of Ljubljana
76 * Published in Social Networks.
77 * @author Danyel Fisher
78 * @author Tom Nelson - converted to jung2
79 *
80 */
81 public class TriadicCensus {
82
83 // NOTE THAT THIS RETURNS STANDARD 1-16 COUNT!
84
85 // and their types
86 public static final String[] TRIAD_NAMES = { "N/A", "003", "012", "102", "021D",
87 "021U", "021C", "111D", "111U", "030T", "030C", "201", "120D",
88 "120U", "120C", "210", "300" };
89
90 public static final int MAX_TRIADS = TRIAD_NAMES.length;
91
92 /**
93 * Returns an array whose ith element (for i in [1,16]) is the number of
94 * occurrences of the corresponding triad type in <code>g</code>.
95 * (The 0th element is not meaningful; this array is effectively 1-based.)
96 *
97 * @param g the graph whose properties are being measured
98 * @param <V> the vertex type
99 * @param <E> the edge type
100 * @return an array encoding the number of occurrences of each triad type
101 */
102 public static <V,E> long[] getCounts(DirectedGraph<V,E> g) {
103 long[] count = new long[MAX_TRIADS];
104
105 List<V> id = new ArrayList<V>(g.getVertices());
106
107 // apply algorithm to each edge, one at at time
108 for (int i_v = 0; i_v < g.getVertexCount(); i_v++) {
109 V v = id.get(i_v);
110 for(V u : g.getNeighbors(v)) {
111 int triType = -1;
112 if (id.indexOf(u) <= i_v)
113 continue;
114 Set<V> neighbors = new HashSet<V>(g.getNeighbors(u));
115 neighbors.addAll(g.getNeighbors(v));
116 neighbors.remove(u);
117 neighbors.remove(v);
118 if (g.isSuccessor(v,u) && g.isSuccessor(u,v)) {
119 triType = 3;
120 } else {
121 triType = 2;
122 }
123 count[triType] += g.getVertexCount() - neighbors.size() - 2;
124 for (V w : neighbors) {
125 if (shouldCount(g, id, u, v, w)) {
126 count [ triType ( triCode(g, u, v, w) ) ] ++;
127 }
128 }
129 }
130 }
131 int sum = 0;
132 for (int i = 2; i <= 16; i++) {
133 sum += count[i];
134 }
135 int n = g.getVertexCount();
136 count[1] = n * (n-1) * (n-2) / 6 - sum;
137 return count;
138 }
139
140 /**
141 * This is the core of the technique in the paper. Returns an int from 0 to
142 * 63 which encodes the presence of all possible links between u, v, and w
143 * as bit flags: WU = 32, UW = 16, WV = 8, VW = 4, UV = 2, VU = 1
144 *
145 * @param g the graph for which the calculation is being made
146 * @param u a vertex in g
147 * @param v a vertex in g
148 * @param w a vertex in g
149 * @param <V> the vertex type
150 * @param <E> the edge type
151 * @return an int encoding the presence of all links between u, v, and w
152 */
153 public static <V,E> int triCode(Graph<V,E> g, V u, V v, V w) {
154 int i = 0;
155 i += link(g, v, u ) ? 1 : 0;
156 i += link(g, u, v ) ? 2 : 0;
157 i += link(g, v, w ) ? 4 : 0;
158 i += link(g, w, v ) ? 8 : 0;
159 i += link(g, u, w ) ? 16 : 0;
160 i += link(g, w, u ) ? 32 : 0;
161 return i;
162 }
163
164 protected static <V,E> boolean link(Graph<V,E> g, V a, V b) {
165 return g.isPredecessor(b, a);
166 }
167
168
169 /**
170 * @param triCode the code returned by {@code triCode()}
171 * @return the string code associated with the numeric type
172 */
173 public static int triType( int triCode ) {
174 return codeToType[ triCode ];
175 }
176
177 /**
178 * For debugging purposes, this is copied straight out of the paper which
179 * means that they refer to triad types 1-16.
180 */
181 protected static final int[] codeToType = { 1, 2, 2, 3, 2, 4, 6, 8, 2, 6, 5, 7, 3, 8,
182 7, 11, 2, 6, 4, 8, 5, 9, 9, 13, 6, 10, 9, 14, 7, 14, 12, 15, 2, 5,
183 6, 7, 6, 9, 10, 14, 4, 9, 9, 12, 8, 13, 14, 15, 3, 7, 8, 11, 7, 12,
184 14, 15, 8, 14, 13, 15, 11, 15, 15, 16 };
185
186 /**
187 * Return true iff this ordering is canonical and therefore we should build statistics for it.
188 *
189 * @param g the graph whose properties are being examined
190 * @param id a list of the vertices in g; used to assign an index to each
191 * @param u a vertex in g
192 * @param v a vertex in g
193 * @param w a vertex in g
194 * @param <V> the vertex type
195 * @param <E> the edge type
196 * @return true if index(u) < index(w), or if index(v) < index(w) < index(u)
197 * and v doesn't link to w; false otherwise
198 */
199 protected static <V,E> boolean shouldCount(Graph<V,E> g, List<V> id, V u, V v, V w) {
200 int i_u = id.indexOf(u);
201 int i_w = id.indexOf(w);
202 if (i_u < i_w)
203 return true;
204 int i_v = id.indexOf(v);
205 if ((i_v < i_w) && (i_w < i_u) && (!g.isNeighbor(w,v)))
206 return true;
207 return false;
208 }
209 }