Coding-For-MBA

Graph neural networks capture relational structure beyond Euclidean grids. This lesson focuses on:

Run python Day_60_Graph_and_Geometric_Learning/solutions.py to inspect handcrafted GraphSAGE/GAT layers, monitor training metrics on a toy citation-style graph, and export feature embeddings.

Interactive Notebooks

Run this lesson’s code interactively in your browser:

!!! tip “About JupyterLite” JupyterLite runs entirely in your browser using WebAssembly. No installation or server required! Note: First launch may take a moment to load.

Additional Materials

???+ example “solutions.py” View on GitHub

```python title="solutions.py"
"""Graph neural network helpers for Day 60."""

from __future__ import annotations

from dataclasses import dataclass
from typing import Dict, List

import numpy as np


@dataclass
class GraphData:
    """Simple container for toy graph node classification tasks."""

    features: np.ndarray
    adjacency: np.ndarray
    labels: np.ndarray


def build_toy_graph() -> GraphData:
    """Create a reproducible toy graph with two communities."""

    features = np.array(
        [
            [1.0, 0.2, 0.8],
            [0.9, 0.1, 0.7],
            [1.1, 0.25, 0.9],
            [-0.2, 1.0, 0.1],
            [-0.1, 0.9, 0.2],
            [-0.3, 1.1, 0.15],
        ],
        dtype=float,
    )
    labels = np.array([0, 0, 0, 1, 1, 1], dtype=int)
    adjacency = np.array(
        [
            [1, 1, 1, 0, 0, 0],
            [1, 1, 1, 0, 0, 0],
            [1, 1, 1, 0, 0, 1],
            [0, 0, 0, 1, 1, 1],
            [0, 0, 0, 1, 1, 1],
            [0, 0, 1, 1, 1, 1],
        ],
        dtype=float,
    )
    adjacency = adjacency + np.eye(adjacency.shape[0]) * 0.0  # ensure float copy
    return GraphData(features=features, adjacency=adjacency, labels=labels)


def _softmax(logits: np.ndarray) -> np.ndarray:
    logits = logits - logits.max(axis=1, keepdims=True)
    exp = np.exp(logits)
    exp /= exp.sum(axis=1, keepdims=True)
    return exp


class GraphSAGEClassifier:
    """Mean-aggregator GraphSAGE classifier with manual gradients."""

    def __init__(
        self, hidden_dim: int = 6, num_classes: int = 2, random_state: int = 60
    ) -> None:
        self.hidden_dim = hidden_dim
        self.num_classes = num_classes
        self.random_state = random_state
        self.W_self: np.ndarray | None = None
        self.W_neigh: np.ndarray | None = None
        self.b_hidden: np.ndarray | None = None
        self.W_out: np.ndarray | None = None
        self.b_out: np.ndarray | None = None

    def _ensure_params(self, input_dim: int) -> None:
        if self.W_self is not None:
            return
        rng = np.random.default_rng(self.random_state)
        self.W_self = rng.normal(0.0, 0.4, size=(input_dim, self.hidden_dim))
        self.W_neigh = rng.normal(0.0, 0.4, size=(input_dim, self.hidden_dim))
        self.b_hidden = np.zeros(self.hidden_dim)
        self.W_out = rng.normal(0.0, 0.4, size=(self.hidden_dim, self.num_classes))
        self.b_out = np.zeros(self.num_classes)

    def forward(self, data: GraphData) -> Dict[str, np.ndarray]:
        assert self.W_self is not None and self.W_neigh is not None
        assert (
            self.b_hidden is not None
            and self.W_out is not None
            and self.b_out is not None
        )
        features = data.features
        adjacency = data.adjacency
        degrees = adjacency.sum(axis=1, keepdims=True)
        degrees[degrees == 0] = 1.0
        neigh_mean = adjacency @ features / degrees
        hidden_pre = features @ self.W_self + neigh_mean @ self.W_neigh + self.b_hidden
        hidden = np.maximum(0.0, hidden_pre)
        logits = hidden @ self.W_out + self.b_out
        probs = _softmax(logits)
        return {
            "logits": logits,
            "hidden": hidden,
            "hidden_pre": hidden_pre,
            "neigh": neigh_mean,
            "probs": probs,
        }

    def train(self, data: GraphData, epochs: int = 200, lr: float = 0.1) -> List[float]:
        self._ensure_params(data.features.shape[1])
        assert self.W_self is not None and self.W_neigh is not None
        assert (
            self.b_hidden is not None
            and self.W_out is not None
            and self.b_out is not None
        )
        y = data.labels
        y_onehot = np.eye(self.num_classes)[y]
        losses: List[float] = []
        for _ in range(epochs):
            forward = self.forward(data)
            probs = forward["probs"]
            loss = float(-np.sum(y_onehot * np.log(probs + 1e-9)) / y.shape[0])
            losses.append(loss)

            grad_logits = (probs - y_onehot) / y.shape[0]
            grad_W_out = forward["hidden"].T @ grad_logits
            grad_b_out = grad_logits.sum(axis=0)
            grad_hidden = grad_logits @ self.W_out.T
            grad_hidden_pre = grad_hidden * (forward["hidden_pre"] > 0)

            grad_W_self = data.features.T @ grad_hidden_pre
            grad_W_neigh = forward["neigh"].T @ grad_hidden_pre
            grad_b_hidden = grad_hidden_pre.sum(axis=0)

            self.W_out -= lr * grad_W_out
            self.b_out -= lr * grad_b_out
            self.W_self -= lr * grad_W_self
            self.W_neigh -= lr * grad_W_neigh
            self.b_hidden -= lr * grad_b_hidden
        return losses

    def predict(self, data: GraphData) -> np.ndarray:
        probs = self.forward(data)["probs"]
        return probs.argmax(axis=1)

    def accuracy(self, data: GraphData) -> float:
        preds = self.predict(data)
        return float((preds == data.labels).mean())


class GraphAttentionClassifier:
    """Attention-based aggregator with trainable linear head."""

    def __init__(
        self, temperature: float = 0.5, num_classes: int = 2, random_state: int = 60
    ) -> None:
        self.temperature = temperature
        self.num_classes = num_classes
        self.random_state = random_state
        self.W_out: np.ndarray | None = None
        self.b_out: np.ndarray | None = None
        self._embeddings: np.ndarray | None = None
        self._attention: np.ndarray | None = None

    def _attention_matrix(
        self, features: np.ndarray, adjacency: np.ndarray
    ) -> np.ndarray:
        sim = (features @ features.T) / self.temperature
        sim -= sim.max(axis=1, keepdims=True)
        weights = np.exp(sim)
        masked = weights * adjacency
        normaliser = masked.sum(axis=1, keepdims=True)
        normaliser[normaliser == 0] = 1.0
        return masked / normaliser

    def encode(self, data: GraphData) -> np.ndarray:
        adjacency = data.adjacency.copy()
        np.fill_diagonal(adjacency, 1.0)
        attn = self._attention_matrix(data.features, adjacency)
        self._attention = attn
        embeddings = attn @ data.features
        self._embeddings = embeddings
        return embeddings

    def train(self, data: GraphData, epochs: int = 200, lr: float = 0.1) -> List[float]:
        embeddings = self.encode(data)
        if self.W_out is None or self.b_out is None:
            rng = np.random.default_rng(self.random_state)
            self.W_out = rng.normal(
                0.0, 0.4, size=(embeddings.shape[1], self.num_classes)
            )
            self.b_out = np.zeros(self.num_classes)
        assert self.W_out is not None and self.b_out is not None
        y = data.labels
        y_onehot = np.eye(self.num_classes)[y]
        losses: List[float] = []
        for _ in range(epochs):
            logits = embeddings @ self.W_out + self.b_out
            probs = _softmax(logits)
            loss = float(-np.sum(y_onehot * np.log(probs + 1e-9)) / y.shape[0])
            losses.append(loss)

            grad_logits = (probs - y_onehot) / y.shape[0]
            grad_W_out = embeddings.T @ grad_logits
            grad_b_out = grad_logits.sum(axis=0)
            self.W_out -= lr * grad_W_out
            self.b_out -= lr * grad_b_out
        return losses

    def predict(self, data: GraphData) -> np.ndarray:
        embeddings = self.encode(data) if self._embeddings is None else self._embeddings
        assert self.W_out is not None and self.b_out is not None
        logits = embeddings @ self.W_out + self.b_out
        probs = _softmax(logits)
        return probs.argmax(axis=1)

    def attention_matrix(self, data: GraphData) -> np.ndarray:
        if self._attention is None:
            self.encode(data)
        assert self._attention is not None
        return self._attention

    def accuracy(self, data: GraphData) -> float:
        preds = self.predict(data)
        return float((preds == data.labels).mean())


def train_node_classifiers(random_state: int = 60) -> Dict[str, object]:
    """Train both GraphSAGE and graph attention classifiers on the toy graph."""

    data = build_toy_graph()
    sage = GraphSAGEClassifier(random_state=random_state)
    gat = GraphAttentionClassifier(random_state=random_state)
    sage_losses = sage.train(data, epochs=200, lr=0.1)
    gat_losses = gat.train(data, epochs=200, lr=0.1)
    results = {
        "graphsage_accuracy": sage.accuracy(data),
        "gat_accuracy": gat.accuracy(data),
        "graphsage_losses": sage_losses,
        "gat_losses": gat_losses,
        "attention_matrix": gat.attention_matrix(data),
    }
    return results


def _demo() -> None:
    results = train_node_classifiers()
    print(
        f"GraphSAGE accuracy: {results['graphsage_accuracy']:.3f} | GAT accuracy: {results['gat_accuracy']:.3f}"
    )
    print(f"Attention matrix row sums: {results['attention_matrix'].sum(axis=1)}")


if __name__ == "__main__":
    _demo()
```