View Javadoc
1   /*
2   * Copyright (c) 2003, The JUNG Authors 
3   *
4   * All rights reserved.
5   *
6   * This software is open-source under the BSD license; see either
7   * "license.txt" or
8   * https://github.com/jrtom/jung/blob/master/LICENSE for a description.
9   */
10  package edu.uci.ics.jung.algorithms.cluster;
11  
12  import java.util.ArrayList;
13  import java.util.LinkedHashMap;
14  import java.util.List;
15  import java.util.Map;
16  import java.util.Set;
17  
18  import com.google.common.base.Function;
19  
20  import edu.uci.ics.jung.algorithms.scoring.BetweennessCentrality;
21  import edu.uci.ics.jung.graph.Graph;
22  import edu.uci.ics.jung.graph.util.Pair;
23  
24  
25  /**
26   * An algorithm for computing clusters (community structure) in graphs based on edge betweenness.
27   * The betweenness of an edge is defined as the extent to which that edge lies along 
28   * shortest paths between all pairs of nodes.
29   *
30   * This algorithm works by iteratively following the 2 step process:
31   * <ul>
32   * <li> Compute edge betweenness for all edges in current graph
33   * <li> Remove edge with highest betweenness
34   * </ul>
35   * <p>
36   * Running time is: O(kmn) where k is the number of edges to remove, m is the total number of edges, and
37   * n is the total number of vertices. For very sparse graphs the running time is closer to O(kn^2) and for
38   * graphs with strong community structure, the complexity is even lower.
39   * <p>
40   * This algorithm is a slight modification of the algorithm discussed below in that the number of edges
41   * to be removed is parameterized.
42   * @author Scott White
43   * @author Tom Nelson (converted to jung2)
44   * @see "Community structure in social and biological networks by Michelle Girvan and Mark Newman"
45   */
46  public class EdgeBetweennessClusterer<V,E> implements Function<Graph<V,E>,Set<Set<V>>> {
47      private int mNumEdgesToRemove;
48      private Map<E, Pair<V>> edges_removed;
49  
50     /**
51      * Constructs a new clusterer for the specified graph.
52      * @param numEdgesToRemove the number of edges to be progressively removed from the graph
53      */
54      public EdgeBetweennessClusterer(int numEdgesToRemove) {
55          mNumEdgesToRemove = numEdgesToRemove;
56          edges_removed = new LinkedHashMap<E, Pair<V>>();
57      }
58  
59      /**
60      * Finds the set of clusters which have the strongest "community structure".
61      * The more edges removed the smaller and more cohesive the clusters.
62      * @param graph the graph
63      */
64      public Set<Set<V>> apply(Graph<V,E> graph) {
65                  
66          if (mNumEdgesToRemove < 0 || mNumEdgesToRemove > graph.getEdgeCount()) {
67              throw new IllegalArgumentException("Invalid number of edges passed in.");
68          }
69          
70          edges_removed.clear();
71  
72          for (int k=0;k<mNumEdgesToRemove;k++) {
73              BetweennessCentrality<V,E> bc = new BetweennessCentrality<V,E>(graph);
74              E to_remove = null;
75              double score = 0;
76              for (E e : graph.getEdges())
77                  if (bc.getEdgeScore(e) > score)
78                  {
79                      to_remove = e;
80                      score = bc.getEdgeScore(e);
81                  }
82              edges_removed.put(to_remove, graph.getEndpoints(to_remove));
83              graph.removeEdge(to_remove);
84          }
85  
86          WeakComponentClusterer<V,E> wcSearch = new WeakComponentClusterer<V,E>();
87          Set<Set<V>> clusterSet = wcSearch.apply(graph);
88  
89          for (Map.Entry<E, Pair<V>> entry : edges_removed.entrySet())
90          {
91              Pair<V> endpoints = entry.getValue();
92              graph.addEdge(entry.getKey(), endpoints.getFirst(), endpoints.getSecond());
93          }
94          return clusterSet;
95      }
96  
97      /**
98       * Retrieves the list of all edges that were removed 
99       * (assuming extract(...) was previously called).  
100      * The edges returned
101      * are stored in order in which they were removed.
102      * 
103      * @return the edges in the original graph
104      */
105     public List<E> getEdgesRemoved() 
106     {
107         return new ArrayList<E>(edges_removed.keySet());
108     }
109 }