pagerank/main.py
2025-05-16 22:08:07 +02:00

127 lines
4.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/python3
import argparse
import subprocess
import re
import numpy as np
import matplotlib.pyplot as plt
def run_pagerank(matrix_path, epsilon, alpha):
"""
Execute pagerank binary with given parameters
Returns a dictionary with parsed output or None if execution fails.
"""
cmd = [
"./out/pagerank",
"--matrix", matrix_path,
"--epsilon", str(epsilon),
"--alpha", str(alpha)
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
output = result.stdout
data = {}
match = re.search(r"Read and convert matrix: (\d+\.\d+) seconds", output)
data["read_time"] = float(match.group(1)) if match else None
match = re.search(r"Pagerank \(power\): (\d+) Iterations", output)
data["pagerank_iterations"] = int(match.group(1)) if match else None
match = re.search(r"Pagerank \(power\): (\d+\.\d+) seconds", output)
data["pagerank_time"] = float(match.group(1)) if match else None
match = re.search(r"Gauß-Seidel: (\d+) Iterations", output)
data["gauss_seidel_iterations"] = int(match.group(1)) if match else None
match = re.search(r"Gauß-Seidel: (\d+\.\d+) seconds", output)
data["gauss_seidel_time"] = float(match.group(1)) if match else None
match = re.search(r"Difference norm Pagerank power and Gauß-Seidel: (\d+\.\d+)", output)
data["diff_norm"] = float(match.group(1)) if match else None
return data
except subprocess.CalledProcessError as e:
print(f"Error running command for alpha={alpha}: {e}")
return None
except (AttributeError, ValueError) as e:
print(f"Error parsing output for alpha={alpha}: {e}")
return None
def generate_data(matrix_path, epsilon, alpha_values):
pagerank_iterations = []
gauss_seidel_iterations = []
pagerank_times = []
gauss_seidel_times = []
diff_norms = []
for alpha in alpha_values:
print(f"Running for alpha={alpha:.6f}")
data = run_pagerank(matrix_path, epsilon, alpha)
if data:
pagerank_iterations.append(data["pagerank_iterations"])
gauss_seidel_iterations.append(data["gauss_seidel_iterations"])
pagerank_times.append(data["pagerank_time"])
gauss_seidel_times.append(data["gauss_seidel_time"])
diff_norms.append(data["diff_norm"])
else:
print(f"Skipping alpha={alpha:.6f} due to error")
pagerank_iterations.append(None)
gauss_seidel_iterations.append(None)
pagerank_times.append(None)
gauss_seidel_times.append(None)
diff_norms.append(None)
return (pagerank_iterations, gauss_seidel_iterations, pagerank_times, gauss_seidel_times, diff_norms)
def plot_curves(alpha_values, pagerank_iterations, gauss_seidel_iterations, pagerank_times, gauss_seidel_times, diff_norms):
# plots
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 12), sharex=False)
# 1: Number of iterations
ax1.plot(alpha_values, pagerank_iterations, label="PageRank Iterations", color="blue")
ax1.plot(alpha_values, gauss_seidel_iterations, label="Gauß-Seidel Iterations", color="red")
ax1.set_ylabel("Number of Iterations")
ax1.set_title("Iterations vs. α")
ax1.legend()
ax1.grid(True)
# 2: Execution time
ax2.plot(alpha_values, pagerank_times, label="PageRank Time", color="blue")
ax2.plot(alpha_values, gauss_seidel_times, label="Gauß-Seidel Time", color="red")
ax2.set_ylabel("Time (seconds)")
ax2.set_title("Execution Time vs. α")
ax2.legend()
ax2.grid(True)
# 3: Difference norm
ax3.plot(alpha_values, diff_norms, label="Difference Norm", color="green")
ax3.set_xlabel("α")
ax3.set_ylabel("Norm Difference")
ax3.set_title("Norm Difference between PageRank and Gauß-Seidel vs. α")
ax3.legend()
ax3.grid(True)
plt.show()
def main():
parser = argparse.ArgumentParser(description='Pagerank vs Gauß Seidel execution time depending on α')
parser.add_argument('--alpha', type=int, default=10, help='number of alpha to test in the range (0,1)')
parser.add_argument('--max_alpha', type=float, default=1, help='stay strictly below this alpha')
parser.add_argument('--epsilon', type=float, default=1e-7, help='epsilon value for the convergence')
parser.add_argument('--matrix', type=str, default="out/input.mtx", help='path to the matrix file')
args = parser.parse_args()
alpha_values = np.arange(0, args.max_alpha, (1/args.alpha))
(pagerank_iterations, gauss_seidel_iterations, pagerank_times, gauss_seidel_times, diff_norms) = generate_data(args.matrix, args.epsilon, alpha_values)
plot_curves(alpha_values, pagerank_iterations, gauss_seidel_iterations, pagerank_times, gauss_seidel_times, diff_norms)
if __name__ == "__main__":
main()