Skip to content

Day 41 · Supervised Learning – Regression

What's in this folder?

  • solutions.py – modular helpers for generating synthetic regression data, training a linear regression model, evaluating it, and saving visualisations.
  • tests/test_day_41.py – pytest coverage for the helper functions and the end-to-end demo workflow.

How to run the demo

python Day_41_Supervised_Learning_Regression/solutions.py

This command prints the evaluation metrics and saves regression_fit.png in the working directory.

Key functions

| Function | Description | | --- | --- | | generate_regression_data | Creates a deterministic dataset using NumPy for reproducible experimentation. | | train_regression_model | Fits a LinearRegression model on the provided training split. | | make_regression_predictions | Returns predictions for a trained model. | | evaluate_regression_model | Computes Mean Squared Error (MSE) and the coefficient of determination (R²). | | plot_regression_results | Saves a scatter plot with the fitted regression line to disk. | | run_linear_regression_demo | Orchestrates the full pipeline and returns the evaluation metrics. |

Tests

Run the regression unit tests with:

pytest tests/test_day_41.py

Previous: Day 40 – Day 40: Introduction to Machine Learning & Core ConceptsNext: Day 42 – Day 42 · Supervised Learning – Classification (Part 1)

You are on lesson 41 of 108.

Additional Materials

solutions.py

View on GitHub

solutions.py
"""Reusable helpers for the Day 41 regression lesson.

The module exposes composable utilities to generate synthetic data,
train a linear regression model, evaluate it, and optionally persist
visualisations.  When executed as a script it will run the full demo and
save a regression plot to ``regression_fit.png``.
"""

from __future__ import annotations

from pathlib import Path
from typing import Dict, Tuple

import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split


def generate_regression_data(
    n_samples: int = 100,
    slope: float = 2.5,
    intercept: float = 10.0,
    noise_std: float = 1.0,
    random_state: int = 42,
) -> Tuple[np.ndarray, np.ndarray]:
    """Create a deterministic synthetic regression dataset."""
    rng = np.random.default_rng(random_state)
    X = 2 * rng.random((n_samples, 1))
    noise = rng.normal(0.0, noise_std, n_samples)
    y = intercept + slope * X.flatten() + noise
    return X, y


def split_regression_data(
    X: np.ndarray,
    y: np.ndarray,
    test_size: float = 0.2,
    random_state: int = 42,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Split features and labels into train/test sets."""
    return train_test_split(X, y, test_size=test_size, random_state=random_state)


def train_regression_model(
    X_train: np.ndarray, y_train: np.ndarray
) -> LinearRegression:
    """Fit a :class:`~sklearn.linear_model.LinearRegression` model."""
    model = LinearRegression()
    model.fit(X_train, y_train)
    return model


def make_regression_predictions(model: LinearRegression, X: np.ndarray) -> np.ndarray:
    """Return predictions for the provided features."""
    return model.predict(X)


def evaluate_regression_model(
    y_true: np.ndarray, y_pred: np.ndarray
) -> Dict[str, float]:
    """Calculate common regression metrics for the provided predictions."""
    return {
        "mse": mean_squared_error(y_true, y_pred),
        "r2": r2_score(y_true, y_pred),
    }


def plot_regression_results(
    X: np.ndarray,
    y_true: np.ndarray,
    y_pred: np.ndarray,
    filepath: str | Path = "regression_fit.png",
) -> Tuple[plt.Figure, Path]:
    """Create and save a scatter/line plot of the regression fit."""
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.scatter(X, y_true, color="blue", label="Actual values")
    # Sorting ensures the regression line is displayed correctly.
    sorted_indices = np.argsort(X.flatten())
    ax.plot(
        X.flatten()[sorted_indices],
        y_pred[sorted_indices],
        color="red",
        linewidth=2,
        label="Regression line",
    )
    ax.set_title("Linear Regression Fit")
    ax.set_xlabel("Independent Variable (X)")
    ax.set_ylabel("Dependent Variable (y)")
    ax.grid(True)
    ax.legend()

    output_path = Path(filepath)
    fig.savefig(output_path)
    return fig, output_path


def run_linear_regression_demo(
    save_path: str | Path = "regression_fit.png",
) -> Dict[str, float]:
    """Execute the full demo workflow and return evaluation metrics."""
    X, y = generate_regression_data()
    X_train, X_test, y_train, y_test = split_regression_data(X, y)
    model = train_regression_model(X_train, y_train)
    y_pred = make_regression_predictions(model, X_test)
    metrics = evaluate_regression_model(y_test, y_pred)
    plot_regression_results(X_test, y_test, y_pred, filepath=save_path)
    return metrics


if __name__ == "__main__":
    metrics = run_linear_regression_demo()
    print("--- Linear Regression Example ---")
    print("Generated a dataset with 100 samples.")
    print("Training set size: 80 samples")
    print("Testing set size: 20 samples")
    print("-" * 30)
    print(f"Learned metrics -> MSE: {metrics['mse']:.4f}, R^2: {metrics['r2']:.4f}")
    print("Saved a plot of the regression fit to 'regression_fit.png'")