Source code for littleballoffur.node_sampling.pagerankbasedsampler

import random
import numpy as np
import networkx as nx
import networkit as nk
from typing import Union, List
from littleballoffur.sampler import Sampler

NKGraph = type(nk.graph.Graph())
NXGraph = nx.classes.graph.Graph


[docs]class PageRankBasedSampler(Sampler): r"""An implementation of PageRank based sampling. Nodes are sampled proportional to the PageRank score of nodes. `"For details about the algorithm see this paper." <https://cs.stanford.edu/people/jure/pubs/sampling-kdd06.pdf>`_ Args: number_of_nodes (int): Number of nodes. Default is 100. seed (int): Random seed. Default is 42. """ def __init__(self, number_of_nodes: int = 100, seed: int = 42, alpha: float = 0.85): self.number_of_nodes = number_of_nodes self.seed = seed self.alpha = alpha self._set_seed() def _create_initial_node_set(self, graph) -> List[int]: """ Choosing initial nodes. """ nodes = [node for node in range(self.backend.get_number_of_nodes(graph))] page_rank = self.backend.get_pagerank(graph, self.alpha) page_rank_sum = np.sum(page_rank) probabilities = page_rank / page_rank_sum sampled_nodes = np.random.choice( nodes, size=self.number_of_nodes, replace=False, p=probabilities ) return sampled_nodes
[docs] def sample(self, graph: Union[NXGraph, NKGraph]) -> Union[NXGraph, NKGraph]: """ Sampling nodes randomly proportional to the normalized pagerank score. Arg types: * **graph** *(NetworkX or NetworKit graph)* - The graph to be sampled from. Return types: * **new_graph** *(NetworkX or NetworKit graph)* - The graph of sampled nodes. """ self._deploy_backend(graph) self._check_number_of_nodes(graph) sampled_nodes = self._create_initial_node_set(graph) new_graph = self.backend.get_subgraph(graph, sampled_nodes) return new_graph