Java PageRank 实现
PageRank 是 Google 早期用于对网页进行排名的算法,下面我将提供一个完整的 Java 实现,包括基本算法和优化版本。

(图片来源网络,侵删)
基本 PageRank 实现
import java.util.*;
public class BasicPageRank {
private Map<String, Double> pageRanks;
private Map<String, List<String>> graph;
private int totalNodes;
private double dampingFactor = 0.85;
private int maxIterations = 100;
private double tolerance = 0.0001;
public BasicPageRank(Map<String, List<String>> graph) {
this.graph = graph;
this.totalNodes = graph.size();
this.pageRanks = new HashMap<>();
initializePageRanks();
}
private void initializePageRanks() {
double initialRank = 1.0 / totalNodes;
for (String node : graph.keySet()) {
pageRanks.put(node, initialRank);
}
}
public void compute() {
for (int i = 0; i < maxIterations; i++) {
Map<String, Double> newRanks = new HashMap<>();
double maxDiff = 0.0;
for (String node : graph.keySet()) {
double rank = (1 - dampingFactor) / totalNodes;
for (Map.Entry<String, List<String>> entry : graph.entrySet()) {
String source = entry.getKey();
List<String> outgoingLinks = entry.getValue();
if (outgoingLinks.contains(node)) {
rank += dampingFactor * (pageRanks.get(source) / outgoingLinks.size());
}
}
newRanks.put(node, rank);
maxDiff = Math.max(maxDiff, Math.abs(pageRanks.get(node) - rank));
}
pageRanks = newRanks;
if (maxDiff < tolerance) {
System.out.println("Converged after " + (i + 1) + " iterations");
break;
}
}
}
public Map<String, Double> getPageRanks() {
return pageRanks;
}
public static void main(String[] args) {
// 示例图结构
Map<String, List<String>> graph = new HashMap<>();
graph.put("A", Arrays.asList("B", "C"));
graph.put("B", Arrays.asList("C"));
graph.put("C", Arrays.asList("A"));
graph.put("D", Arrays.asList("C"));
BasicPageRank pageRank = new BasicPageRank(graph);
pageRank.compute();
System.out.println("PageRank Results:");
pageRank.getPageRanks().entrySet().stream()
.sorted(Map.Entry.<String, Double>comparingByValue().reversed())
.forEach(entry -> System.out.println(entry.getKey() + ": " + entry.getValue()));
}
}
优化版 PageRank 实现(使用矩阵运算)
import java.util.*;
public class OptimizedPageRank {
private Map<String, Integer> nodeToIndex;
private List<String> indexToNode;
private double[][] transitionMatrix;
private double[] pageRanks;
private int totalNodes;
private double dampingFactor = 0.85;
private int maxIterations = 100;
private double tolerance = 0.0001;
public OptimizedPageRank(Map<String, List<String>> graph) {
this.totalNodes = graph.size();
initializeNodeMappings(graph);
buildTransitionMatrix(graph);
initializePageRanks();
}
private void initializeNodeMappings(Map<String, List<String>> graph) {
nodeToIndex = new HashMap<>();
indexToNode = new ArrayList<>();
int index = 0;
for (String node : graph.keySet()) {
nodeToIndex.put(node, index);
indexToNode.add(node);
index++;
}
}
private void buildTransitionMatrix(Map<String, List<String>> graph) {
transitionMatrix = new double[totalNodes][totalNodes];
for (Map.Entry<String, List<String>> entry : graph.entrySet()) {
String source = entry.getKey();
List<String> outgoingLinks = entry.getValue();
int sourceIndex = nodeToIndex.get(source);
if (outgoingLinks.isEmpty()) {
// 处理悬挂节点(没有出边的节点)
for (int j = 0; j < totalNodes; j++) {
transitionMatrix[sourceIndex][j] = 1.0 / totalNodes;
}
} else {
for (String target : outgoingLinks) {
int targetIndex = nodeToIndex.get(target);
transitionMatrix[sourceIndex][targetIndex] = 1.0 / outgoingLinks.size();
}
}
}
}
private void initializePageRanks() {
pageRanks = new double[totalNodes];
Arrays.fill(pageRanks, 1.0 / totalNodes);
}
public void compute() {
for (int i = 0; i < maxIterations; i++) {
double[] newRanks = new double[totalNodes];
double maxDiff = 0.0;
for (int j = 0; j < totalNodes; j++) {
double rank = (1 - dampingFactor) / totalNodes;
for (int k = 0; k < totalNodes; k++) {
rank += dampingFactor * transitionMatrix[k][j] * pageRanks[k];
}
newRanks[j] = rank;
maxDiff = Math.max(maxDiff, Math.abs(pageRanks[j] - rank));
}
pageRanks = newRanks;
if (maxDiff < tolerance) {
System.out.println("Converged after " + (i + 1) + " iterations");
break;
}
}
}
public Map<String, Double> getPageRanks() {
Map<String, Double> result = new HashMap<>();
for (int i = 0; i < totalNodes; i++) {
result.put(indexToNode.get(i), pageRanks[i]);
}
return result;
}
public static void main(String[] args) {
// 示例图结构
Map<String, List<String>> graph = new HashMap<>();
graph.put("A", Arrays.asList("B", "C"));
graph.put("B", Arrays.asList("C"));
graph.put("C", Arrays.asList("A"));
graph.put("D", Arrays.asList("C"));
OptimizedPageRank pageRank = new OptimizedPageRank(graph);
pageRank.compute();
System.out.println("PageRank Results:");
pageRank.getPageRanks().entrySet().stream()
.sorted(Map.Entry.<String, Double>comparingByValue().reversed())
.forEach(entry -> System.out.println(entry.getKey() + ": " + entry.getValue()));
}
}
使用说明
-
基本实现:
- 适用于小型图结构
- 直接使用邻接表表示图
- 每次迭代计算所有节点的PageRank值
-
优化实现:
- 使用矩阵运算,适合大型图
- 首先将节点映射到索引,构建转移矩阵
- 处理了悬挂节点(没有出边的节点)
-
参数说明:
dampingFactor: 阻尼因子,通常设为0.85maxIterations: 最大迭代次数tolerance: 收敛阈值
-
运行结果:
(图片来源网络,侵删)程序会输出每个节点的PageRank值,并按排名从高到低排序
扩展功能
如果需要进一步扩展,可以考虑:
- 分布式实现: 使用Hadoop或Spark处理大规模图
- 增量更新: 当图结构发生变化时,只更新受影响节点的PageRank
- 个性化PageRank: 为不同用户计算个性化的PageRank值
- 可视化: 添加图形化展示功能
这两个实现都提供了PageRank算法的核心功能,可以根据具体需求选择使用基本版或优化版。

(图片来源网络,侵删)
