pagerank implementation

This commit is contained in:
2025-04-18 09:46:23 +02:00
parent 67e846c758
commit 151ffe8dd8
19 changed files with 575 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
*.pdf
out/
data/

51
Makefile Normal file
View File

@ -0,0 +1,51 @@
# --------------------------------------------------
# Configuration
# --------------------------------------------------
CC := gcc
CFLAGS := -Wall -fopenmp -O3
SRCDIR := src
OBJDIR := out
DATAPATH := data/web-Google/web-Google.mtx
#
SRCS := $(wildcard $(SRCDIR)/*.c)
OBJS := $(patsubst $(SRCDIR)/%.c,$(OBJDIR)/%.o,$(SRCS))
# --------------------------------------------------
# Phony targets
# --------------------------------------------------
.PHONY: all sparse clean
all: sparse
sparse: $(OBJDIR)/sparse | $(OBJDIR)
@echo "→ Running sparse"
./$(OBJDIR)/sparse
# --------------------------------------------------
# Link
# --------------------------------------------------
$(OBJDIR)/sparse: $(OBJS) $(DATAPATH) | $(OBJDIR)
@echo "→ Copying input data"
cp $(DATAPATH) $(OBJDIR)/input.rb
@echo "→ Linking $@"
$(CC) $(CFLAGS) -o $@ $(OBJS)
# --------------------------------------------------
# Compile
# --------------------------------------------------
$(OBJDIR)/%.o: $(SRCDIR)/%.c | $(OBJDIR)
@echo "→ Compiling $<"
$(CC) $(CFLAGS) -c $< -o $@
# --------------------------------------------------
# Check if output directory exists
# --------------------------------------------------
$(OBJDIR):
mkdir -p $(OBJDIR)
# --------------------------------------------------
# Clean
# --------------------------------------------------
clean:
rm -rf $(OBJDIR)/*

34
src/main.c Normal file
View File

@ -0,0 +1,34 @@
#include <stdio.h>
#include <stdlib.h>
#include "matrix_operation.h"
#include "sparse_matrix.h"
#include "read_from_rb.h"
#include "power_algorithm.h"
#include "time_helper.h"
#include <sys/time.h>
void test_pagerank(const char *path) {
struct timeval tvstart, tv;
gettimeofday(&tvstart, NULL);
SparseMatrix *matrix = read_sparse_matrix_from_mtx(path);
convert_to_stochastic(matrix);
// Time 2
gettimeofday(&tv, NULL);
print_time_diff("read matrix", &tvstart, &tv);
double *result = malloc(matrix->num_nodes * sizeof(double));
if (result == NULL) {
fprintf(stderr, "Memory allocation failed\n");
exit(EXIT_FAILURE);
}
result = pagerank(matrix, 1e-10, 0.35);
// Time 3
gettimeofday(&tv, NULL);
print_time_diff("finish", &tvstart, &tv);
}
int main() {
test_pagerank("./out/input.rb");
return 0;
}

68
src/matrix_operation.c Normal file
View File

@ -0,0 +1,68 @@
#include "matrix_operation.h"
#include "vector.h"
#include <stdlib.h>
#include <stdio.h>
#include "sparse_matrix.h"
void multiply_vector_matrix(const double *vector, const SparseMatrix *matrix, double *result) {
init_vector(result, matrix->num_nodes, 0.0);
for (int i = 0; i < matrix->num_arcs; ++i) {
int origin = matrix->arcs[i].origin;
int dest = matrix->arcs[i].dest;
result[dest] += vector[origin] * matrix->arcs[i].value;
}
}
void multiply_vector_matrix_parallel(const double *vector, const SparseMatrix *matrix, double *result) {
int num_nodes = matrix->num_nodes;
int num_arcs = matrix->num_arcs;
init_vector(result, num_nodes, 0.0);
#pragma omp parallel
{
double *local_result = (double *)calloc(num_nodes, sizeof(double));
// parallelize for loop
#pragma omp for
for (int i = 0; i < num_arcs; ++i) {
int origin = matrix->arcs[i].origin;
int dest = matrix->arcs[i].dest;
local_result[dest] += vector[origin] * matrix->arcs[i].value;
}
// merge results
for (int j = 0; j < num_nodes; ++j) {
double val = local_result[j];
if (val != 0.0) {
#pragma omp atomic
result[j] += val;
}
}
free(local_result);
}
}
void convert_to_stochastic(SparseMatrix *matrix) {
double *non_zero = (double *)calloc(matrix->num_nodes, sizeof(double));
if (!non_zero) {
fprintf(stderr, "Memory allocation failed\n");
return;
}
// count non zero values
for (int i = 0; i < matrix->num_arcs; i++) {
non_zero[matrix->arcs[i].dest] += matrix->arcs[i].value;
}
// normalize values
for (int i = 0; i < matrix->num_arcs; i++) {
int dest = matrix->arcs[i].dest;
double old_value = matrix->arcs[i].value;
matrix->arcs[i].value = old_value / (double)non_zero[dest];
}
free(non_zero);
}

9
src/matrix_operation.h Normal file
View File

@ -0,0 +1,9 @@
#ifndef MATRIX_OPERATION_H
#define MATRIX_OPERATION_H
#include "sparse_matrix.h"
void multiply_vector_matrix(const double *vector, const SparseMatrix *matrix, double *result);
void multiply_vector_matrix_parallel(const double *vector, const SparseMatrix *matrix, double *result);
void convert_to_stochastic(SparseMatrix *matrix);
#endif

21
src/matrix_print.c Normal file
View File

@ -0,0 +1,21 @@
#include <stdio.h>
#include "sparse_matrix.h"
void print_vector(double *vector, int size) {
for (int i = 0; i < size; ++i) {
printf("%lf ", vector[i]);
}
printf("\n");
}
void print_sparse_matrix(const SparseMatrix *matrix) {
if (matrix) {
printf("%d %d %d\n", matrix->num_nodes, matrix->num_nodes, matrix->num_arcs);
for (int i = 0; i < matrix->num_arcs; ++i) {
printf("%d ", matrix->arcs[i].origin + 1);
printf("%d ", matrix->arcs[i].dest + 1);
printf("%.10f ", matrix->arcs[i].value);
printf("\n");
}
}
}

8
src/matrix_print.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef MATRIX_PRINT_H
#define MATRIX_PRINT_H
#include "sparse_matrix.h"
void print_vector(double *vector, int size);
void print_sparse_matrix(const SparseMatrix *matrix);
#endif

78
src/power_algorithm.c Normal file
View File

@ -0,0 +1,78 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "sparse_matrix.h"
#include "matrix_operation.h"
#include "vector.h"
double* power_algorithm_step(const SparseMatrix *matrix, const double *pi) {
double* result = malloc(matrix->num_nodes*sizeof(double));
multiply_vector_matrix_parallel(pi, matrix, result);
return result;
}
double* power_algorithm(const SparseMatrix *matrix, double epsilon) {
int N = matrix->num_nodes;
int vecsize = N*sizeof(double);
double* pi = malloc(vecsize);
double* pi2 = malloc(vecsize);
init_vector(pi, N, 1.0/(double)N);
pi2 = power_algorithm_step(matrix, pi);
while (diff_norm_vector(pi, pi2, N)>epsilon) {
printf("step\n");
memcpy(pi, pi2, vecsize);
pi2 = power_algorithm_step(matrix, pi);
}
return pi2;
}
double* pagerank(const SparseMatrix *matrix, double epsilon, double alpha) {
int N = matrix->num_nodes;
size_t vec_size = N * sizeof(double);
double* pi = malloc(vec_size);
double* pi_new = malloc(vec_size);
double* f = malloc(vec_size);
double right_const = (1.0 - alpha) / N;
init_vector(pi, N, 1.0 / N);
generate_f(matrix, f);
double diff;
int iter = 0;
do {
// 1. pi * M
double* temp = power_algorithm_step(matrix, pi);
// 2. alpha/N * (pi * f)
double right_var = (alpha/(double)N) * vec_product(pi, f, N);
// 3. alpha*(pi*M) + (right_const+alpha/N * (pi * f))*e
for (int i = 0; i < N; i++) {
pi_new[i] = alpha * temp[i] + right_const + right_var;
}
// 4. Normalize
normalize_vector(pi_new, N);
// 5. Calculate convergence
diff = diff_norm_vector(pi, pi_new, N);
// 6. Update for next iteration
free(pi);
pi = pi_new;
pi_new = malloc(vec_size);
if ((++iter)%1 == 0) {
printf("Iteration %d: diff = %.16f\n", iter, diff);
}
free(temp);
} while (diff > epsilon);
free(pi_new);
free(f);
return pi;
}

8
src/power_algorithm.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef POWER_ALGORITHM_H
#define POWER_ALGORITHM_H
#include "sparse_matrix.h"
double* power_algorithm(const SparseMatrix *matrix, double epsilon);
double* pagerank(const SparseMatrix *matrix, double epsilon, double alpha);
#endif

71
src/read_from_mtx.c Normal file
View File

@ -0,0 +1,71 @@
#include <stdio.h>
#include <stdlib.h>
#include "sparse_matrix.h"
int read_dims_ignore_comment(SparseMatrix *matrix, FILE *file) {
char buffer[1024];
int read_dims = 0;
while (fgets(buffer, sizeof(buffer), file) != NULL) {
char *line = buffer;
if (*line == '%' || *line == '\n' || *line == '\0') {
continue;
} else {
if (sscanf(line, "%d %d", &matrix->num_nodes, &matrix->num_arcs) != 2) {
return 1;
}
read_dims = 1;
break;
}
}
return (!read_dims);
}
void parse_arcs(SparseMatrix *matrix, FILE *file) {
for (int i = 0; i < matrix->num_arcs; ++i) {
char line[256];
if (fgets(line, sizeof(line), file) == NULL) {
fprintf(stderr, "Failed to read arc %d\n", i);
exit(5);
}
char *ptr = line;
int origin, dest;
double value = 1.0;
if (sscanf(ptr, "%d %d %lf", &origin, &dest, &value) < 2) {
fprintf(stderr, "Failed to read arc %d\n", i);
exit(5);
} else {
matrix->arcs[i].origin = origin;
matrix->arcs[i].dest = dest;
matrix->arcs[i].value = value;
}
}
}
SparseMatrix* read_sparse_matrix_from_mtx(const char *filename) {
FILE *file = fopen(filename, "r");
if (!file) {
fprintf(stderr, "Failed to read file\n");
exit(1);
}
SparseMatrix *matrix = (SparseMatrix*)malloc(sizeof(SparseMatrix));
if (!matrix) {
fprintf(stderr, "Failed to allocate memory for matrix\n");
exit(3);
}
if (read_dims_ignore_comment(matrix, file)) {
fprintf(stderr, "Failed to read matrix dimensions\n");
}
matrix->arcs = (Arc*)malloc(matrix->num_arcs * sizeof(Arc));
if (!matrix->arcs) {
fprintf(stderr, "Failed to allocate memory for arcs\n");
exit(4);
}
parse_arcs(matrix, file);
fclose(file);
return matrix;
}

7
src/read_from_mtx.h Normal file
View File

@ -0,0 +1,7 @@
#ifndef MATRIX_READ_MTX_H
#define MATRIX_READ_MTX_H
#include "sparse_matrix.h"
SparseMatrix* read_sparse_matrix_from_mtx(const char *filename);
#endif

105
src/read_from_rb.c Normal file
View File

@ -0,0 +1,105 @@
#include <stdio.h>
#include <stdlib.h>
#include "read_from_rb.h"
#include "sparse_matrix.h"
SparseMatrix* read_sparse_matrix_from_rb(const char *filename) {
FILE *file = fopen(filename, "r");
if (!file) {
fprintf(stderr, "Failed to open file %s\n", filename);
exit(1);
}
SparseMatrix *matrix = (SparseMatrix*)malloc(sizeof(SparseMatrix));
if (!matrix) {
fprintf(stderr, "Failed to allocate memory for matrix\n");
exit(2);
}
char buffer[256];
fgets(buffer, sizeof(buffer), file);
fgets(buffer, sizeof(buffer), file);
fgets(buffer, sizeof(buffer), file);
char type[4];
int rows, cols, nonzero;
if (sscanf(buffer, "%3s %d %d %d", type, &rows, &cols, &nonzero) != 4) {
fprintf(stderr, "Failed to read matrix metadata\n");
exit(3);
}
matrix->num_nodes = rows;
matrix->num_arcs = nonzero;
matrix->arcs = (Arc*)malloc(nonzero * sizeof(Arc));
if (!matrix->arcs) {
fprintf(stderr, "Failed to allocate memory for arcs\n");
exit(4);
}
fgets(buffer, sizeof(buffer), file);
int *col_ptr = (int*)malloc((cols + 1) * sizeof(int));
int ptr_index = 0;
while (ptr_index < cols + 1) {
if (fgets(buffer, sizeof(buffer), file) == NULL) {
fprintf(stderr, "Unexpected end of file while reading column pointers\n");
exit(5);
}
char *ptr = buffer;
int num_read;
while (sscanf(ptr, "%d%n", &col_ptr[ptr_index], &num_read) == 1) {
ptr += num_read;
ptr_index++;
}
}
int *row_ind = (int*)malloc(nonzero * sizeof(int));
int row_index = 0;
while (row_index < nonzero) {
if (fgets(buffer, sizeof(buffer), file) == NULL) {
fprintf(stderr, "Unexpected end of file while reading row indices\n");
exit(6);
}
char *ptr = buffer;
int num_read;
while (sscanf(ptr, "%d%n", &row_ind[row_index], &num_read) == 1) {
ptr += num_read;
row_index++;
}
}
double *values = (double*)malloc(nonzero * sizeof(double));
int val_index = 0;
while (val_index < nonzero) {
if (fgets(buffer, sizeof(buffer), file) == NULL) {
fprintf(stderr, "Unexpected end of file while reading values\n");
exit(7);
}
char *ptr = buffer;
int num_read;
double val;
while (sscanf(ptr, "%lf%n", &val, &num_read) == 1) {
values[val_index] = val;
ptr += num_read;
val_index++;
}
}
int arc_idx = 0;
for (int j = 0; j < cols; j++) {
int start = col_ptr[j] - 1;
int end = col_ptr[j + 1] - 1;
for (int k = start; k < end; k++) {
matrix->arcs[arc_idx].origin = j;
matrix->arcs[arc_idx].dest = row_ind[k] - 1;
matrix->arcs[arc_idx].value = values[k];
arc_idx++;
}
}
free(col_ptr);
free(row_ind);
free(values);
fclose(file);
return matrix;
}

8
src/read_from_rb.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef RB_READER_H
#define RB_READER_H
#include "sparse_matrix.h"
SparseMatrix* read_sparse_matrix_from_rb(const char *filename);
#endif

11
src/sparse_matrix.c Normal file
View File

@ -0,0 +1,11 @@
#include "sparse_matrix.h"
#include <stdlib.h>
void free_sparse_matrix(SparseMatrix *matrix) {
if (matrix) {
free(matrix->arcs);
free(matrix);
}
}

21
src/sparse_matrix.h Normal file
View File

@ -0,0 +1,21 @@
#ifndef SPARSE_MATRIX_H
#define SPARSE_MATRIX_H
typedef struct {
int origin;
int dest;
double value;
} Arc;
typedef struct {
int num_nodes;
int num_arcs;
Arc *arcs;
} SparseMatrix;
void free_sparse_matrix(SparseMatrix *matrix);
void print_sparse_matrix(const SparseMatrix *matrix);
SparseMatrix* read_sparse_matrix_from_mtx(const char *filename);
void multiply_vector_matrix(const double *vector, const SparseMatrix *matrix, double *result);
#endif

12
src/time_helper.c Normal file
View File

@ -0,0 +1,12 @@
#include <sys/time.h>
#include <stdio.h>
void print_time_diff(const char* label, struct timeval* start, struct timeval* end) {
long seconds = end->tv_sec - start->tv_sec;
long microseconds = end->tv_usec - start->tv_usec;
if (microseconds < 0) {
seconds -= 1;
microseconds += 1000000;
}
printf("%s: %ld.%ld seconds\n", label, seconds, microseconds);
}

6
src/time_helper.h Normal file
View File

@ -0,0 +1,6 @@
#ifndef TIME_HELPER_H
#define TIME_HELPER_H
void print_time_diff(const char* label, struct timeval* start, struct timeval* end);
#endif

43
src/vector.c Normal file
View File

@ -0,0 +1,43 @@
#include <math.h>
#include "sparse_matrix.h"
void init_vector(double *vector, int size, double value) {
for (int i = 0; i < size; ++i) {
vector[i] = value;
}
}
double diff_norm_vector(double *vector1, double *vector2, int size) {
double res = 0.0;
for (int i = 0; i < size; ++i) {
res += fabs(vector1[i] - vector2[i]);
}
return res;
}
void generate_f(const SparseMatrix *matrix, double *res) {
int N = matrix->num_nodes;
init_vector(res, N, 0);
int num_arcs = matrix->num_arcs;
for (int i = 0; i < num_arcs; ++i) {
res[matrix->arcs[i].dest] = 1;
}
}
double vec_product(const double* v1, const double* v2, int N) {
double sum = 0.0;
for (int i = 0; i < N; i++) {
sum += v1[i] * v2[i];
}
return sum;
}
void normalize_vector(double* v, int N) {
double sum = 0.0;
for (int i = 0; i < N; i++) {
sum += v[i];
}
for (int i = 0; i < N; i++) {
v[i] /= sum;
}
}

11
src/vector.h Normal file
View File

@ -0,0 +1,11 @@
#ifndef VECTOR_H
#define VECTOR_H
#include "sparse_matrix.h"
void init_vector(double *vector, int size, double value);
double diff_norm_vector(double *vector1, double *vector2, int size);
void generate_f(const SparseMatrix *matrix, double *res);
void normalize_vector(double* v, int N);
double vec_product(const double* v1, const double* v2, int N);
#endif