solutions.py
– reusable helpers for loading the Iris dataset, scaling features, and training logistic regression and KNN classifiers with deterministic settings.tests/test_day_42.py
– pytest coverage for the dataset preparation utilities and classification metrics.python Day_42_Supervised_Learning_Classification_Part_1/solutions.py
This command trains both classifiers and prints their accuracy scores.
Function | Description |
---|---|
load_and_prepare_iris |
Splits the Iris dataset and returns both raw and standardised feature matrices. |
train_logistic_regression |
Fits a multi-class logistic regression model with a deterministic random seed. |
train_knn_classifier |
Trains a K-Nearest Neighbours classifier on the scaled features. |
evaluate_classifier |
Computes the accuracy score for a trained classifier. |
run_classification_demo |
Executes the full workflow and returns a metrics dictionary for both models. |
Run the classification unit tests with:
pytest tests/test_day_42.py
Run this lesson’s code interactively in your browser:
!!! tip “About JupyterLite” JupyterLite runs entirely in your browser using WebAssembly. No installation or server required! Note: First launch may take a moment to load.
???+ example “solutions.py” View on GitHub
```python title="solutions.py"
"""Reusable helpers for logistic regression and KNN on the Iris dataset."""
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
@dataclass
class IrisData:
X_train: np.ndarray
X_test: np.ndarray
y_train: np.ndarray
y_test: np.ndarray
X_train_scaled: np.ndarray
X_test_scaled: np.ndarray
scaler: StandardScaler
def load_and_prepare_iris(
test_size: float = 0.3,
random_state: int = 42,
) -> IrisData:
"""Load the Iris dataset and return scaled/unscaled splits."""
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(
iris.data,
iris.target,
test_size=test_size,
random_state=random_state,
stratify=iris.target,
)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
return IrisData(
X_train=X_train,
X_test=X_test,
y_train=y_train,
y_test=y_test,
X_train_scaled=X_train_scaled,
X_test_scaled=X_test_scaled,
scaler=scaler,
)
def train_logistic_regression(
X_train: np.ndarray,
y_train: np.ndarray,
*,
random_state: int = 42,
max_iter: int = 200,
) -> LogisticRegression:
"""Train a logistic regression classifier with deterministic settings."""
model = LogisticRegression(random_state=random_state, max_iter=max_iter)
model.fit(X_train, y_train)
return model
def train_knn_classifier(
X_train: np.ndarray,
y_train: np.ndarray,
*,
n_neighbors: int = 5,
) -> KNeighborsClassifier:
"""Train a K-Nearest Neighbours classifier."""
model = KNeighborsClassifier(n_neighbors=n_neighbors)
model.fit(X_train, y_train)
return model
def evaluate_classifier(
model,
X_test: np.ndarray,
y_test: np.ndarray,
) -> Dict[str, float]:
"""Return a dictionary of evaluation metrics for the classifier."""
accuracy = accuracy_score(y_test, model.predict(X_test))
return {"accuracy": accuracy}
def run_classification_demo() -> Dict[str, Dict[str, float]]:
"""Run the Day 42 classification demo and return the metrics."""
data = load_and_prepare_iris()
log_reg = train_logistic_regression(data.X_train_scaled, data.y_train)
knn = train_knn_classifier(data.X_train_scaled, data.y_train)
return {
"logistic_regression": evaluate_classifier(
log_reg, data.X_test_scaled, data.y_test
),
"knn": evaluate_classifier(knn, data.X_test_scaled, data.y_test),
}
if __name__ == "__main__":
metrics = run_classification_demo()
print("--- Classification Example on Iris Dataset ---")
print("Training Logistic Regression and KNN models...")
for model_name, model_metrics in metrics.items():
print(
f"{model_name.replace('_', ' ').title()} accuracy: {model_metrics['accuracy']:.4f}"
)
```