Static charts are good for reports, but in the modern era of business intelligence, users expect to be able to interact with their data. They want to hover over data points to get more details, zoom into specific time ranges, and filter data on the fly.
For this, we use Plotly. Plotly is a powerful Python library for creating interactive, publication-quality graphs online. The charts can be displayed in Python notebooks, saved as HTML files, or embedded in web applications and dashboards.
Plotly is a large library, but its plotly.express
module (standardly imported as px
) is a simple, high-level interface for creating entire figures at once. It’s the recommended starting point for most use cases.
The syntax for plotly.express
is very similar to Seaborn’s, which makes it easy to learn.
Let’s see how to create interactive versions of the charts we made previously. When you run this code, Plotly will typically open the chart in your web browser. You can then interact with it.
import plotly.express as px
# Assuming 'df' is a DataFrame with 'Region' and 'Revenue'
# We first group the data to get total revenue per region
region_revenue = df.groupby('Region')['Revenue'].sum().reset_index()
fig = px.bar(region_revenue, x='Region', y='Revenue', title='Total Revenue by Region')
fig.show()
Interaction: Hover over the bars to see the exact revenue for each region.
# Assuming 'daily_revenue' is a DataFrame with 'Date' and 'Revenue'
fig = px.line(daily_revenue, x='Date', y='Revenue', title='Daily Revenue Trend')
fig.show()
Interaction: Hover along the line to see the date and revenue for any point. Click and drag to zoom into a specific time period. Double-click to zoom back out.
# Assuming 'df' has 'Price' and 'Units Sold'
fig = px.scatter(df, x='Price', y='Units Sold', color='Product', hover_data=['Region'], title='Price vs. Units Sold')
fig.show()
Interaction: Hover over any point to see the Price, Units Sold, Product, and Region for that specific transaction. Click on items in the legend to toggle product categories on and off.
You can save your interactive chart as a standalone HTML file that anyone can open in their web browser.
# After creating your figure with px...
fig.write_html("interactive_revenue_chart.html")
ℹ️ Version control note: Any HTML (and PNG) files generated by these exercises are ignored by git. Feel free to regenerate them locally whenever you rerun the lesson.
For these exercises, you will use the cleaned sales_data.csv
from Day 24.
Interactive Sales by Product:
Product
to get the sum of Revenue
for each product.plotly.express
that shows the total Revenue
for each Product
.Interactive Revenue vs. Units Sold:
Revenue
on the y-axis and Units Sold
on the x-axis.Region
.revenue_scatterplot.html
.🎉 Incredible! You’ve now stepped into the world of interactive data visualization. Being able to create and share plots that allow stakeholders to explore the data for themselves is a highly valuable skill for any modern analyst.
Run this lesson’s code interactively in your browser:
!!! tip “About JupyterLite” JupyterLite runs entirely in your browser using WebAssembly. No installation or server required! Note: First launch may take a moment to load.
???+ example “interactive_visualization.py” View on GitHub
```python title="interactive_visualization.py"
"""Reusable helpers for Day 29 interactive Plotly visualisations."""
from __future__ import annotations
from pathlib import Path
from typing import Iterable
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
__all__ = [
"load_sales_data",
"build_region_revenue_bar",
"build_daily_revenue_line",
"build_price_units_scatter",
"main",
]
def _require_columns(df: pd.DataFrame, required: Iterable[str]) -> None:
"""Raise ``ValueError`` if ``df`` is missing any of ``required`` columns."""
missing = set(required) - set(df.columns)
if missing:
columns = ", ".join(sorted(missing))
raise ValueError(f"DataFrame is missing required columns: {columns}")
if df.empty:
raise ValueError("DataFrame must contain at least one row to build the figure.")
def load_sales_data(data_path: Path | str | None = None) -> pd.DataFrame:
"""Return the ``sales_data.csv`` dataset bundled with the lesson."""
if data_path is None:
resource_dir = Path(__file__).resolve().parent
data_path = resource_dir / "sales_data.csv"
df = pd.read_csv(data_path, parse_dates=["Date"])
return df.dropna().reset_index(drop=True)
def build_region_revenue_bar(df: pd.DataFrame) -> go.Figure:
"""Return a bar chart showing total revenue by region."""
_require_columns(df, ["Region", "Revenue"])
region_revenue = (
df.groupby("Region", as_index=False)["Revenue"].sum().sort_values("Region")
)
fig = px.bar(
region_revenue,
x="Region",
y="Revenue",
color="Region",
title="Total Revenue by Region",
labels={"Revenue": "Total Revenue (USD)"},
)
fig.update_layout(showlegend=False)
return fig
def build_daily_revenue_line(df: pd.DataFrame) -> go.Figure:
"""Return a daily revenue line chart with markers."""
_require_columns(df, ["Date", "Revenue"])
daily_revenue = (
df.groupby("Date", as_index=False)["Revenue"].sum().sort_values("Date")
)
# ``plotly`` preserves ``datetime64`` values when rendering, which pandas now
# returns from ``groupby`` aggregations. Converting to plain ``datetime``
# objects keeps backwards compatibility with the existing visualisation and
# tests that expect Python ``datetime`` instances.
daily_revenue["Date"] = [
pd.Timestamp(ts).to_pydatetime() for ts in daily_revenue["Date"]
]
fig = px.line(
daily_revenue,
x="Date",
y="Revenue",
title="Daily Revenue Trend",
markers=True,
)
fig.update_traces(mode="lines+markers")
fig.update_layout(yaxis_title="Revenue (USD)")
for trace in fig.data:
python_datetimes = tuple(pd.Timestamp(x).to_pydatetime() for x in trace.x)
trace.update(x=python_datetimes)
return fig
def build_price_units_scatter(df: pd.DataFrame) -> go.Figure:
"""Return a scatter plot comparing price and units sold with revenue sizing."""
_require_columns(df, ["Price", "Units Sold", "Revenue", "Product", "Region"])
fig = px.scatter(
df,
x="Price",
y="Units Sold",
color="Product",
size="Revenue",
hover_data=["Region", "Revenue"],
title="Price vs. Units Sold Analysis",
)
fig.update_layout(
legend_title_text="Product",
xaxis_title="Price (USD)",
yaxis_title="Units Sold",
)
return fig
def main() -> None:
"""Load the lesson dataset and display the interactive figures."""
try:
df = load_sales_data()
except FileNotFoundError:
print("Error: sales_data.csv not found. Keep the CSV beside this script.")
return
print("Data loaded successfully.")
print("\n--- 1. Interactive Bar Chart: Revenue by Region ---")
build_region_revenue_bar(df).show()
print("\n--- 2. Interactive Line Chart: Revenue Over Time ---")
build_daily_revenue_line(df).show()
print("\n--- 3. Interactive Scatter Plot: Price vs. Units Sold ---")
scatter = build_price_units_scatter(df)
scatter.show()
output_filename = "interactive_scatter_plot.html"
scatter.write_html(output_filename)
print(
f"\nScatter plot saved to '{output_filename}'. You can open this file in a web browser."
)
if __name__ == "__main__":
main()
```
???+ example “solutions.py” View on GitHub
```python title="solutions.py"
"""
Day 29: Solutions to Exercises
"""
from pathlib import Path
import pandas as pd
import plotly.express as px
# --- Load and Prepare Data ---
resource_dir = Path(__file__).resolve().parent
data_path = resource_dir / "sales_data.csv"
try:
df = pd.read_csv(data_path, parse_dates=["Date"])
df.dropna(inplace=True) # Drop rows with missing values for simplicity
print("Data loaded successfully for exercises.")
except FileNotFoundError:
print("Error: sales_data.csv not found. Keep the CSV beside this script.")
df = pd.DataFrame()
if not df.empty:
# --- Exercise 1: Interactive Sales by Product ---
print("\n--- Solution to Exercise 1 ---")
# Group the data by Product to get the sum of Revenue
product_revenue = df.groupby("Product")["Revenue"].sum().reset_index()
# Create the interactive bar chart
fig1 = px.bar(
product_revenue,
x="Product",
y="Revenue",
title="Total Revenue by Product",
labels={"Revenue": "Total Revenue (USD)"},
color="Product",
)
# In a real environment, you would use fig1.show()
# For this exercise, we'll save it to an HTML file.
fig1.write_html("product_revenue_bar_chart.html")
print("Plot for Exercise 1 saved to 'product_revenue_bar_chart.html'")
# --- Exercise 2: Interactive Revenue vs. Units Sold ---
print("\n--- Solution to Exercise 2 ---")
# Create the interactive scatter plot
fig2 = px.scatter(
df,
x="Units Sold",
y="Revenue",
color="Region",
title="Revenue vs. Units Sold by Region",
labels={"Units Sold": "Number of Units Sold", "Revenue": "Total Revenue (USD)"},
hover_data=["Product"], # Add Product to the hover tooltip
)
# Save the chart to an HTML file
fig2.write_html("revenue_scatterplot.html")
print("Plot for Exercise 2 saved to 'revenue_scatterplot.html'")
else:
print("\nSkipping exercises as DataFrame could not be loaded.")
```