Source code for graphnetz.datasets.combinatorial

"""Combinatorial-optimization graph datasets.

All instances are synthetic; canonical PyG benchmarks do not cover this
category. The library provides:

- :class:`RandomTSP` / :func:`random_tsp` — Euclidean TSP (k-NN over 2D points).
- :class:`RandomVRP` / :func:`random_vrp` — Capacitated VRP (multi-depot k-NN).
- :class:`RandomMaxFlow` / :func:`random_maxflow` — random capacitated networks
  with a single source/sink, suitable for max-flow / min-cut benchmarks.
- :class:`RandomBipartiteMatching` / :func:`random_bipartite_matching` —
  bipartite assignment instances with random weights.
- :class:`RandomColoring` / :func:`random_coloring` — random Erdos-Renyi graphs
  for graph-coloring / max-cut experiments.
- :class:`RandomMaxCut` / :func:`random_maxcut` — alias of RandomColoring.
"""

from __future__ import annotations

import torch
from torch_geometric.data import Data, InMemoryDataset
from torch_geometric.utils import erdos_renyi_graph, to_undirected


def _euclidean_knn_edges(pos: torch.Tensor, k: int) -> torch.Tensor:
    n = pos.size(0)
    dist = torch.cdist(pos, pos)
    knn = dist.topk(k + 1, largest=False).indices[:, 1:]
    src = torch.arange(n).repeat_interleave(k)
    dst = knn.reshape(-1)
    return to_undirected(torch.stack([src, dst], dim=0))


[docs] class RandomTSP(InMemoryDataset): """Euclidean TSP instances as k-NN graphs over 2D points.""" def __init__(self, root: str, num_graphs: int = 128, num_nodes: int = 50, k: int = 5, seed: int = 0) -> None: self.num_graphs, self.num_nodes, self.k, self.seed = num_graphs, num_nodes, k, seed super().__init__(root) self.load(self.processed_paths[0]) @property def processed_file_names(self) -> str: return f"tsp_{self.num_graphs}_{self.num_nodes}_{self.k}_{self.seed}.pt"
[docs] def process(self) -> None: gen = torch.Generator().manual_seed(self.seed) data_list: list[Data] = [] for _ in range(self.num_graphs): pos = torch.rand((self.num_nodes, 2), generator=gen) data_list.append(Data(x=pos, edge_index=_euclidean_knn_edges(pos, self.k), pos=pos)) self.save(data_list, self.processed_paths[0])
[docs] class RandomVRP(InMemoryDataset): """Capacitated VRP instances: customers + multiple depots, k-NN connectivity. Node features are ``[x, y, demand, is_depot]``. Demands are zero for depots and uniform on ``(0, 1]`` for customers. """ def __init__( self, root: str, num_graphs: int = 128, num_customers: int = 40, num_depots: int = 3, k: int = 6, seed: int = 0, ) -> None: self.num_graphs = num_graphs self.num_customers = num_customers self.num_depots = num_depots self.k = k self.seed = seed super().__init__(root) self.load(self.processed_paths[0]) @property def processed_file_names(self) -> str: return f"vrp_{self.num_graphs}_{self.num_customers}_{self.num_depots}_{self.k}_{self.seed}.pt"
[docs] def process(self) -> None: gen = torch.Generator().manual_seed(self.seed) n = self.num_customers + self.num_depots data_list: list[Data] = [] for _ in range(self.num_graphs): pos = torch.rand((n, 2), generator=gen) demand = torch.rand((n, 1), generator=gen) is_depot = torch.zeros((n, 1)) is_depot[: self.num_depots] = 1.0 demand[: self.num_depots] = 0.0 x = torch.cat([pos, demand, is_depot], dim=-1) data_list.append(Data(x=x, edge_index=_euclidean_knn_edges(pos, self.k), pos=pos)) self.save(data_list, self.processed_paths[0])
[docs] class RandomMaxFlow(InMemoryDataset): """Random capacitated networks with a marked source/sink for max-flow tasks. Nodes 0 and ``num_nodes - 1`` are designated source and sink. Edges carry a positive capacity stored in ``edge_attr``. """ def __init__( self, root: str, num_graphs: int = 128, num_nodes: int = 30, edge_prob: float = 0.2, seed: int = 0, ) -> None: self.num_graphs = num_graphs self.num_nodes = num_nodes self.edge_prob = edge_prob self.seed = seed super().__init__(root) self.load(self.processed_paths[0]) @property def processed_file_names(self) -> str: return f"maxflow_{self.num_graphs}_{self.num_nodes}_{self.edge_prob}_{self.seed}.pt"
[docs] def process(self) -> None: # Use a *local* generator via a saved/restored RNG fork so we don't # mutate the caller's torch RNG state. ``erdos_renyi_graph`` reads # the global RNG, so we have to bracket the call with fork_rng. gen = torch.Generator().manual_seed(self.seed) data_list: list[Data] = [] with torch.random.fork_rng(): torch.manual_seed(self.seed) for _ in range(self.num_graphs): edge_index = erdos_renyi_graph(self.num_nodes, self.edge_prob, directed=True) capacity = torch.rand((edge_index.size(1), 1), generator=gen) * 9 + 1 # in [1, 10] x = torch.zeros((self.num_nodes, 2)) x[0, 0] = 1.0 # source flag x[-1, 1] = 1.0 # sink flag data_list.append(Data(x=x, edge_index=edge_index, edge_attr=capacity)) self.save(data_list, self.processed_paths[0])
[docs] class RandomBipartiteMatching(InMemoryDataset): """Random bipartite assignment instances with weighted edges. Two sides of size ``size`` are connected by a Bernoulli mask of probability ``edge_prob``; edge weights are uniform on ``(0, 1]`` and stored in ``edge_attr``. Node features mark each node's side. """ def __init__( self, root: str, num_graphs: int = 128, size: int = 25, edge_prob: float = 0.4, seed: int = 0, ) -> None: self.num_graphs = num_graphs self.size = size self.edge_prob = edge_prob self.seed = seed super().__init__(root) self.load(self.processed_paths[0]) @property def processed_file_names(self) -> str: return f"matching_{self.num_graphs}_{self.size}_{self.edge_prob}_{self.seed}.pt"
[docs] def process(self) -> None: gen = torch.Generator().manual_seed(self.seed) data_list: list[Data] = [] for _ in range(self.num_graphs): mask = torch.rand((self.size, self.size), generator=gen) < self.edge_prob left, right = mask.nonzero(as_tuple=True) edge_index = torch.stack([left, right + self.size], dim=0) edge_index = to_undirected(edge_index) weights = torch.rand(edge_index.size(1), 1, generator=gen) x = torch.zeros((2 * self.size, 2)) x[: self.size, 0] = 1.0 x[self.size :, 1] = 1.0 data_list.append(Data(x=x, edge_index=edge_index, edge_attr=weights)) self.save(data_list, self.processed_paths[0])
[docs] class RandomColoring(InMemoryDataset): """Random Erdos-Renyi graphs for graph-coloring and max-cut benchmarks.""" def __init__( self, root: str, num_graphs: int = 128, num_nodes: int = 40, edge_prob: float = 0.15, seed: int = 0, ) -> None: self.num_graphs = num_graphs self.num_nodes = num_nodes self.edge_prob = edge_prob self.seed = seed super().__init__(root) self.load(self.processed_paths[0]) @property def processed_file_names(self) -> str: return f"coloring_{self.num_graphs}_{self.num_nodes}_{self.edge_prob}_{self.seed}.pt"
[docs] def process(self) -> None: # Bracket ``erdos_renyi_graph`` (which reads the global torch RNG) # in ``fork_rng`` so the caller's RNG state is restored after this # builder runs. data_list: list[Data] = [] with torch.random.fork_rng(): torch.manual_seed(self.seed) for _ in range(self.num_graphs): edge_index = erdos_renyi_graph(self.num_nodes, self.edge_prob, directed=False) x = torch.ones((self.num_nodes, 1)) data_list.append(Data(x=x, edge_index=edge_index)) self.save(data_list, self.processed_paths[0])
# Backward-compatible alias: max-cut and coloring share this generator. RandomMaxCut = RandomColoring
[docs] def random_tsp(root: str, **kwargs: int | float) -> RandomTSP: return RandomTSP(root=root, **kwargs) # type: ignore[arg-type]
[docs] def random_vrp(root: str, **kwargs: int | float) -> RandomVRP: return RandomVRP(root=root, **kwargs) # type: ignore[arg-type]
[docs] def random_maxflow(root: str, **kwargs: int | float) -> RandomMaxFlow: return RandomMaxFlow(root=root, **kwargs) # type: ignore[arg-type]
[docs] def random_bipartite_matching(root: str, **kwargs: int | float) -> RandomBipartiteMatching: return RandomBipartiteMatching(root=root, **kwargs) # type: ignore[arg-type]
[docs] def random_coloring(root: str, **kwargs: int | float) -> RandomColoring: return RandomColoring(root=root, **kwargs) # type: ignore[arg-type]
[docs] def random_maxcut(root: str, **kwargs: int | float) -> RandomColoring: return RandomColoring(root=root, **kwargs) # type: ignore[arg-type]
__all__ = [ "RandomBipartiteMatching", "RandomColoring", "RandomMaxCut", "RandomMaxFlow", "RandomTSP", "RandomVRP", "random_bipartite_matching", "random_coloring", "random_maxcut", "random_maxflow", "random_tsp", "random_vrp", ]