Source code for littleballoffur.dataset.dataset_reader

import io
import os
import numpy as np
import pandas as pd
import networkx as nx
from six.moves import urllib


[docs]class GraphReader(object):
    r"""Class to read benchmark datasets for the sampling task.

    Args:
        dataset (str): Dataset of interest. One of facebook/wikipedia/github/twitch/deezer/lastfm. Default is 'wikipedia'.
    """

    def __init__(self, dataset: str = "wikipedia"):
        self.dataset = dataset + "_edges.csv"
        self.base_url = (
            "https://github.com/benedekrozemberczki/littleballoffur/raw/master/dataset/"
        )

    def _pandas_reader(self, bytes):
        """
        Reading bytes as a Pandas dataframe.
        """
        tab = pd.read_csv(
            io.BytesIO(bytes), encoding="utf8", sep=",", dtype={"switch": np.int32}
        )
        return tab

    def _dataset_reader(self):
        """
        Reading the dataset from the web.
        """
        path = os.path.join(self.base_url, self.dataset)
        data = urllib.request.urlopen(path).read()
        data = self._pandas_reader(data)
        return data

[docs]    def get_graph(self) -> nx.classes.graph.Graph:
        r"""Getting the graph.

        Return types:
            * **graph** *(NetworkX graph)* - Graph of interest.
        """
        data = self._dataset_reader()
        graph = nx.convert_matrix.from_pandas_edgelist(data, "id_1", "id_2")
        return graph