Python 7 min read

Matplotlib Tutorial: Complete Guide to Data Visualization in Python

Master data visualization with Matplotlib. Learn to create line plots, bar charts, scatter plots, histograms, and customize your visualizations.

MR

Moshiour Rahman

Advertisement

What is Matplotlib?

Matplotlib is Python’s most popular data visualization library. It creates static, animated, and interactive visualizations with just a few lines of code.

Installation

pip install matplotlib
import matplotlib.pyplot as plt
import numpy as np

Basic Plots

Line Plot

# Simple line plot
x = [1, 2, 3, 4, 5]
y = [2, 4, 6, 8, 10]

plt.plot(x, y)
plt.title('Simple Line Plot')
plt.xlabel('X Axis')
plt.ylabel('Y Axis')
plt.show()

Multiple Lines

x = np.linspace(0, 10, 100)

plt.figure(figsize=(10, 6))
plt.plot(x, np.sin(x), label='sin(x)', color='blue', linestyle='-')
plt.plot(x, np.cos(x), label='cos(x)', color='red', linestyle='--')
plt.plot(x, np.tan(x), label='tan(x)', color='green', linestyle=':')

plt.title('Trigonometric Functions')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.grid(True, alpha=0.3)
plt.ylim(-2, 2)
plt.show()

Line Styles and Markers

x = np.array([1, 2, 3, 4, 5])
y = np.array([1, 4, 9, 16, 25])

plt.figure(figsize=(12, 4))

# Different styles
plt.subplot(1, 3, 1)
plt.plot(x, y, 'ro-')  # Red circles with line
plt.title('ro-')

plt.subplot(1, 3, 2)
plt.plot(x, y, 'b^--')  # Blue triangles with dashed line
plt.title('b^--')

plt.subplot(1, 3, 3)
plt.plot(x, y, 'gs:')  # Green squares with dotted line
plt.title('gs:')

plt.tight_layout()
plt.show()

Bar Charts

Basic Bar Chart

categories = ['Python', 'JavaScript', 'Java', 'C++', 'Go']
values = [85, 78, 72, 65, 58]

plt.figure(figsize=(10, 6))
plt.bar(categories, values, color='steelblue', edgecolor='black')
plt.title('Programming Language Popularity')
plt.xlabel('Language')
plt.ylabel('Popularity Score')
plt.show()

Horizontal Bar Chart

plt.figure(figsize=(10, 6))
plt.barh(categories, values, color='coral')
plt.title('Programming Language Popularity')
plt.xlabel('Popularity Score')
plt.show()

Grouped Bar Chart

labels = ['Q1', 'Q2', 'Q3', 'Q4']
product_a = [20, 35, 30, 35]
product_b = [25, 32, 34, 20]
product_c = [22, 30, 28, 25]

x = np.arange(len(labels))
width = 0.25

fig, ax = plt.subplots(figsize=(10, 6))
bars1 = ax.bar(x - width, product_a, width, label='Product A', color='#2ecc71')
bars2 = ax.bar(x, product_b, width, label='Product B', color='#3498db')
bars3 = ax.bar(x + width, product_c, width, label='Product C', color='#e74c3c')

ax.set_xlabel('Quarter')
ax.set_ylabel('Sales')
ax.set_title('Quarterly Sales by Product')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()

plt.tight_layout()
plt.show()

Stacked Bar Chart

labels = ['2020', '2021', '2022', '2023']
mobile = [30, 35, 40, 45]
desktop = [50, 45, 40, 35]
tablet = [20, 20, 20, 20]

plt.figure(figsize=(10, 6))
plt.bar(labels, mobile, label='Mobile', color='#3498db')
plt.bar(labels, desktop, bottom=mobile, label='Desktop', color='#2ecc71')
plt.bar(labels, tablet, bottom=np.array(mobile) + np.array(desktop),
        label='Tablet', color='#e74c3c')

plt.xlabel('Year')
plt.ylabel('Traffic Share (%)')
plt.title('Website Traffic by Device')
plt.legend()
plt.show()

Scatter Plots

Basic Scatter Plot

np.random.seed(42)
x = np.random.randn(100)
y = x + np.random.randn(100) * 0.5

plt.figure(figsize=(10, 6))
plt.scatter(x, y, alpha=0.6, edgecolors='black', linewidth=0.5)
plt.title('Scatter Plot')
plt.xlabel('X')
plt.ylabel('Y')
plt.show()

Scatter with Size and Color

np.random.seed(42)
n = 50
x = np.random.rand(n)
y = np.random.rand(n)
colors = np.random.rand(n)
sizes = 1000 * np.random.rand(n)

plt.figure(figsize=(10, 6))
scatter = plt.scatter(x, y, c=colors, s=sizes, alpha=0.5, cmap='viridis')
plt.colorbar(scatter, label='Color Value')
plt.title('Bubble Chart')
plt.xlabel('X')
plt.ylabel('Y')
plt.show()

Histograms

Basic Histogram

np.random.seed(42)
data = np.random.randn(1000)

plt.figure(figsize=(10, 6))
plt.hist(data, bins=30, color='steelblue', edgecolor='black', alpha=0.7)
plt.title('Normal Distribution')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.show()

Multiple Histograms

np.random.seed(42)
data1 = np.random.normal(0, 1, 1000)
data2 = np.random.normal(2, 1.5, 1000)

plt.figure(figsize=(10, 6))
plt.hist(data1, bins=30, alpha=0.5, label='Distribution 1', color='blue')
plt.hist(data2, bins=30, alpha=0.5, label='Distribution 2', color='red')
plt.title('Comparing Distributions')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.legend()
plt.show()

Pie Charts

sizes = [35, 30, 20, 15]
labels = ['Python', 'JavaScript', 'Java', 'Others']
colors = ['#3498db', '#f1c40f', '#e74c3c', '#95a5a6']
explode = (0.05, 0, 0, 0)  # Explode first slice

plt.figure(figsize=(8, 8))
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct='%1.1f%%', shadow=True, startangle=90)
plt.title('Programming Language Usage')
plt.axis('equal')
plt.show()

Subplots

Grid of Subplots

fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Plot 1: Line
x = np.linspace(0, 10, 100)
axes[0, 0].plot(x, np.sin(x))
axes[0, 0].set_title('Line Plot')

# Plot 2: Bar
categories = ['A', 'B', 'C', 'D']
values = [23, 45, 56, 78]
axes[0, 1].bar(categories, values, color='coral')
axes[0, 1].set_title('Bar Chart')

# Plot 3: Scatter
np.random.seed(42)
axes[1, 0].scatter(np.random.rand(50), np.random.rand(50))
axes[1, 0].set_title('Scatter Plot')

# Plot 4: Histogram
axes[1, 1].hist(np.random.randn(1000), bins=30, color='green', alpha=0.7)
axes[1, 1].set_title('Histogram')

plt.tight_layout()
plt.show()

Unequal Subplots

fig = plt.figure(figsize=(12, 8))

# Large plot on left
ax1 = fig.add_subplot(1, 2, 1)
ax1.plot(np.random.randn(100).cumsum())
ax1.set_title('Main Plot')

# Two small plots on right
ax2 = fig.add_subplot(2, 2, 2)
ax2.bar(['A', 'B', 'C'], [10, 20, 15])
ax2.set_title('Bar Chart')

ax3 = fig.add_subplot(2, 2, 4)
ax3.scatter(np.random.rand(20), np.random.rand(20))
ax3.set_title('Scatter')

plt.tight_layout()
plt.show()

Customization

Colors and Styles

# Using different color formats
plt.figure(figsize=(12, 4))

x = np.linspace(0, 10, 100)

plt.subplot(1, 3, 1)
plt.plot(x, np.sin(x), color='red')  # Named color
plt.title('Named Color')

plt.subplot(1, 3, 2)
plt.plot(x, np.sin(x), color='#2ecc71')  # Hex color
plt.title('Hex Color')

plt.subplot(1, 3, 3)
plt.plot(x, np.sin(x), color=(0.1, 0.2, 0.5))  # RGB tuple
plt.title('RGB Tuple')

plt.tight_layout()
plt.show()

Annotations

x = np.linspace(0, 2 * np.pi, 100)
y = np.sin(x)

plt.figure(figsize=(10, 6))
plt.plot(x, y)

# Add annotations
plt.annotate('Maximum', xy=(np.pi/2, 1), xytext=(np.pi/2 + 1, 1.2),
             arrowprops=dict(arrowstyle='->', color='red'),
             fontsize=12, color='red')

plt.annotate('Minimum', xy=(3*np.pi/2, -1), xytext=(3*np.pi/2 + 1, -1.2),
             arrowprops=dict(arrowstyle='->', color='blue'),
             fontsize=12, color='blue')

plt.title('Annotated Sine Wave')
plt.xlabel('x')
plt.ylabel('sin(x)')
plt.grid(True, alpha=0.3)
plt.show()

Custom Ticks

x = np.linspace(0, 2 * np.pi, 100)
y = np.sin(x)

plt.figure(figsize=(10, 6))
plt.plot(x, y)

# Custom x-ticks
plt.xticks([0, np.pi/2, np.pi, 3*np.pi/2, 2*np.pi],
           ['0', 'π/2', 'π', '3π/2', '2π'])

# Custom y-ticks
plt.yticks([-1, -0.5, 0, 0.5, 1])

plt.title('Sine Wave with Custom Ticks')
plt.grid(True, alpha=0.3)
plt.show()

Heatmaps

np.random.seed(42)
data = np.random.rand(10, 10)

plt.figure(figsize=(10, 8))
plt.imshow(data, cmap='viridis', aspect='auto')
plt.colorbar(label='Value')
plt.title('Heatmap')
plt.xlabel('X')
plt.ylabel('Y')
plt.show()

Correlation Matrix

# Simulated correlation matrix
np.random.seed(42)
n_vars = 5
labels = ['Var A', 'Var B', 'Var C', 'Var D', 'Var E']
corr = np.random.rand(n_vars, n_vars)
corr = (corr + corr.T) / 2  # Make symmetric
np.fill_diagonal(corr, 1)  # Diagonal = 1

plt.figure(figsize=(8, 6))
plt.imshow(corr, cmap='coolwarm', vmin=-1, vmax=1)
plt.colorbar(label='Correlation')
plt.xticks(range(n_vars), labels, rotation=45)
plt.yticks(range(n_vars), labels)

# Add correlation values
for i in range(n_vars):
    for j in range(n_vars):
        plt.text(j, i, f'{corr[i, j]:.2f}', ha='center', va='center')

plt.title('Correlation Matrix')
plt.tight_layout()
plt.show()

Box Plots

np.random.seed(42)
data = [np.random.normal(0, std, 100) for std in range(1, 5)]

plt.figure(figsize=(10, 6))
bp = plt.boxplot(data, labels=['Group 1', 'Group 2', 'Group 3', 'Group 4'],
                  patch_artist=True)

# Customize colors
colors = ['#3498db', '#2ecc71', '#e74c3c', '#f1c40f']
for patch, color in zip(bp['boxes'], colors):
    patch.set_facecolor(color)

plt.title('Box Plot Comparison')
plt.ylabel('Values')
plt.grid(True, alpha=0.3)
plt.show()

Saving Figures

# Create a figure
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot([1, 2, 3, 4], [1, 4, 2, 3])
ax.set_title('Sample Plot')

# Save as different formats
fig.savefig('plot.png', dpi=300, bbox_inches='tight')
fig.savefig('plot.pdf', bbox_inches='tight')
fig.savefig('plot.svg', bbox_inches='tight')

# Save with transparent background
fig.savefig('plot_transparent.png', dpi=300, transparent=True, bbox_inches='tight')

Practical Examples

Sales Dashboard

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Monthly sales trend
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']
sales = [120, 135, 125, 140, 155, 170]
axes[0, 0].plot(months, sales, marker='o', color='#3498db', linewidth=2)
axes[0, 0].fill_between(months, sales, alpha=0.3)
axes[0, 0].set_title('Monthly Sales Trend')
axes[0, 0].set_ylabel('Sales ($K)')

# Sales by category
categories = ['Electronics', 'Clothing', 'Food', 'Books']
cat_sales = [45, 30, 15, 10]
axes[0, 1].pie(cat_sales, labels=categories, autopct='%1.1f%%',
               colors=['#3498db', '#2ecc71', '#e74c3c', '#f1c40f'])
axes[0, 1].set_title('Sales by Category')

# Regional comparison
regions = ['North', 'South', 'East', 'West']
q1 = [25, 30, 20, 25]
q2 = [30, 28, 25, 27]
x = np.arange(len(regions))
width = 0.35
axes[1, 0].bar(x - width/2, q1, width, label='Q1', color='#3498db')
axes[1, 0].bar(x + width/2, q2, width, label='Q2', color='#2ecc71')
axes[1, 0].set_xticks(x)
axes[1, 0].set_xticklabels(regions)
axes[1, 0].set_title('Regional Sales Comparison')
axes[1, 0].legend()

# Daily distribution
np.random.seed(42)
daily_sales = np.random.normal(5000, 1000, 100)
axes[1, 1].hist(daily_sales, bins=20, color='#9b59b6', edgecolor='black')
axes[1, 1].set_title('Daily Sales Distribution')
axes[1, 1].set_xlabel('Sales ($)')

plt.tight_layout()
plt.show()

Time Series Plot

# Generate time series data
np.random.seed(42)
dates = np.arange('2023-01', '2024-01', dtype='datetime64[D]')
values = np.cumsum(np.random.randn(len(dates))) + 100

plt.figure(figsize=(14, 6))
plt.plot(dates, values, color='#3498db', linewidth=1)
plt.fill_between(dates, values, alpha=0.3)

# Add moving average
window = 30
ma = np.convolve(values, np.ones(window)/window, mode='valid')
plt.plot(dates[window-1:], ma, color='#e74c3c', linewidth=2, label=f'{window}-day MA')

plt.title('Stock Price Over Time')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

Quick Reference

FunctionDescription
plt.plot()Line plot
plt.bar() / plt.barh()Bar chart
plt.scatter()Scatter plot
plt.hist()Histogram
plt.pie()Pie chart
plt.boxplot()Box plot
plt.imshow()Heatmap/image
plt.subplot()Create subplots
plt.savefig()Save figure

Summary

Matplotlib is essential for:

  1. Exploratory analysis - quick data visualization
  2. Publication-quality figures - customizable and professional
  3. Dashboard creation - combining multiple plots
  4. Integration - works with Pandas, NumPy, scikit-learn

Master Matplotlib to effectively communicate insights from your data.

Advertisement

MR

Moshiour Rahman

Software Architect & AI Engineer

Share:
MR

Moshiour Rahman

Software Architect & AI Engineer

Enterprise software architect with deep expertise in financial systems, distributed architecture, and AI-powered applications. Building large-scale systems at Fortune 500 companies. Specializing in LLM orchestration, multi-agent systems, and cloud-native solutions. I share battle-tested patterns from real enterprise projects.

Related Articles

Comments

Comments are powered by GitHub Discussions.

Configure Giscus at giscus.app to enable comments.