Day 41 · Supervised Learning – Regression

What's in this folder?

solutions.py – modular helpers for generating synthetic regression data, training a linear regression model, evaluating it, and saving visualisations.
tests/test_day_41.py – pytest coverage for the helper functions and the end-to-end demo workflow.

How to run the demo

python Day_41_Supervised_Learning_Regression/solutions.py

This command prints the evaluation metrics and saves regression_fit.png in the working directory.

Key functions

| Function | Description | | --- | --- | | generate_regression_data | Creates a deterministic dataset using NumPy for reproducible experimentation. | | train_regression_model | Fits a LinearRegression model on the provided training split. | | make_regression_predictions | Returns predictions for a trained model. | | evaluate_regression_model | Computes Mean Squared Error (MSE) and the coefficient of determination (R²). | | plot_regression_results | Saves a scatter plot with the fitted regression line to disk. | | run_linear_regression_demo | Orchestrates the full pipeline and returns the evaluation metrics. |

Tests

Run the regression unit tests with:

pytest tests/test_day_41.py

Previous: Day 40 – Day 40: Introduction to Machine Learning & Core Concepts • Next: Day 42 – Day 42 · Supervised Learning – Classification (Part 1)

You are on lesson 41 of 108.

Additional Materials

solutions.ipynb 📁 View on GitHub 🚀 Run in Google Colab ☁️ Run in Binder

solutions.py

View on GitHub

solutions.py

"""Reusable helpers for the Day 41 regression lesson.

The module exposes composable utilities to generate synthetic data,
train a linear regression model, evaluate it, and optionally persist
visualisations.  When executed as a script it will run the full demo and
save a regression plot to ``regression_fit.png``.
"""

from __future__ import annotations

from pathlib import Path
from typing import Dict, Tuple

import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split


def generate_regression_data(
    n_samples: int = 100,
    slope: float = 2.5,
    intercept: float = 10.0,
    noise_std: float = 1.0,
    random_state: int = 42,
) -> Tuple[np.ndarray, np.ndarray]:
    """Create a deterministic synthetic regression dataset."""
    rng = np.random.default_rng(random_state)
    X = 2 * rng.random((n_samples, 1))
    noise = rng.normal(0.0, noise_std, n_samples)
    y = intercept + slope * X.flatten() + noise
    return X, y


def split_regression_data(
    X: np.ndarray,
    y: np.ndarray,
    test_size: float = 0.2,
    random_state: int = 42,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Split features and labels into train/test sets."""
    return train_test_split(X, y, test_size=test_size, random_state=random_state)


def train_regression_model(
    X_train: np.ndarray, y_train: np.ndarray
) -> LinearRegression:
    """Fit a :class:`~sklearn.linear_model.LinearRegression` model."""
    model = LinearRegression()
    model.fit(X_train, y_train)
    return model


def make_regression_predictions(model: LinearRegression, X: np.ndarray) -> np.ndarray:
    """Return predictions for the provided features."""
    return model.predict(X)


def evaluate_regression_model(
    y_true: np.ndarray, y_pred: np.ndarray
) -> Dict[str, float]:
    """Calculate common regression metrics for the provided predictions."""
    return {
        "mse": mean_squared_error(y_true, y_pred),
        "r2": r2_score(y_true, y_pred),
    }


def plot_regression_results(
    X: np.ndarray,
    y_true: np.ndarray,
    y_pred: np.ndarray,
    filepath: str | Path = "regression_fit.png",
) -> Tuple[plt.Figure, Path]:
    """Create and save a scatter/line plot of the regression fit."""
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.scatter(X, y_true, color="blue", label="Actual values")
    # Sorting ensures the regression line is displayed correctly.
    sorted_indices = np.argsort(X.flatten())
    ax.plot(
        X.flatten()[sorted_indices],
        y_pred[sorted_indices],
        color="red",
        linewidth=2,
        label="Regression line",
    )
    ax.set_title("Linear Regression Fit")
    ax.set_xlabel("Independent Variable (X)")
    ax.set_ylabel("Dependent Variable (y)")
    ax.grid(True)
    ax.legend()

    output_path = Path(filepath)
    fig.savefig(output_path)
    return fig, output_path


def run_linear_regression_demo(
    save_path: str | Path = "regression_fit.png",
) -> Dict[str, float]:
    """Execute the full demo workflow and return evaluation metrics."""
    X, y = generate_regression_data()
    X_train, X_test, y_train, y_test = split_regression_data(X, y)
    model = train_regression_model(X_train, y_train)
    y_pred = make_regression_predictions(model, X_test)
    metrics = evaluate_regression_model(y_test, y_pred)
    plot_regression_results(X_test, y_test, y_pred, filepath=save_path)
    return metrics


if __name__ == "__main__":
    metrics = run_linear_regression_demo()
    print("--- Linear Regression Example ---")
    print("Generated a dataset with 100 samples.")
    print("Training set size: 80 samples")
    print("Testing set size: 20 samples")
    print("-" * 30)
    print(f"Learned metrics -> MSE: {metrics['mse']:.4f}, R^2: {metrics['r2']:.4f}")
    print("Saved a plot of the regression fit to 'regression_fit.png'")