Deep Learning with PyTorch: Complete Beginner's Guide
Learn deep learning with PyTorch from scratch. Build neural networks, CNNs, RNNs, and train models for image classification and NLP tasks.
Moshiour Rahman
Advertisement
What is PyTorch?
PyTorch is an open-source deep learning framework developed by Facebook. It’s known for its dynamic computation graphs, intuitive API, and strong GPU acceleration support.
PyTorch vs TensorFlow
| Feature | PyTorch | TensorFlow |
|---|---|---|
| Graph type | Dynamic | Static (eager available) |
| Debugging | Easy with Python | More complex |
| Learning curve | Gentler | Steeper |
| Production | TorchServe | TF Serving |
| Research | Preferred | Growing |
Installation
pip install torch torchvision torchaudio
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
Tensors
Creating Tensors
import torch
# From Python list
x = torch.tensor([1, 2, 3, 4])
print(x) # tensor([1, 2, 3, 4])
# 2D tensor
matrix = torch.tensor([[1, 2], [3, 4]])
# Zeros and ones
zeros = torch.zeros(3, 4)
ones = torch.ones(2, 3)
# Random tensors
random = torch.rand(3, 3) # Uniform [0, 1)
randn = torch.randn(3, 3) # Normal distribution
# Range
arange = torch.arange(0, 10, 2) # tensor([0, 2, 4, 6, 8])
# Like another tensor
x = torch.rand(2, 3)
y = torch.zeros_like(x)
Tensor Properties
x = torch.rand(3, 4)
print(x.shape) # torch.Size([3, 4])
print(x.dtype) # torch.float32
print(x.device) # cpu
print(x.requires_grad) # False
Tensor Operations
a = torch.tensor([1, 2, 3], dtype=torch.float32)
b = torch.tensor([4, 5, 6], dtype=torch.float32)
# Element-wise operations
print(a + b) # tensor([5., 7., 9.])
print(a * b) # tensor([4., 10., 18.])
# Matrix multiplication
x = torch.rand(2, 3)
y = torch.rand(3, 4)
z = torch.mm(x, y) # or x @ y
print(z.shape) # torch.Size([2, 4])
# Reshape
x = torch.arange(12)
x = x.view(3, 4) # or x.reshape(3, 4)
print(x.shape) # torch.Size([3, 4])
# Aggregations
print(x.sum())
print(x.mean())
print(x.max())
print(x.argmax())
GPU Support
# Check CUDA availability
print(torch.cuda.is_available())
# Move tensor to GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x = torch.rand(3, 3).to(device)
# Create directly on GPU
x = torch.rand(3, 3, device=device)
Autograd
Automatic Differentiation
# Enable gradient tracking
x = torch.tensor([2.0], requires_grad=True)
y = x ** 2 + 3 * x + 1
# Compute gradients
y.backward()
print(x.grad) # tensor([7.]) = 2*x + 3 = 2*2 + 3
# Disable gradient tracking
with torch.no_grad():
z = x * 2 # No gradient computed
Building Neural Networks
Simple Neural Network
import torch.nn as nn
class SimpleNN(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(SimpleNN, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
# Create model
model = SimpleNN(input_size=784, hidden_size=128, num_classes=10)
print(model)
Using nn.Sequential
model = nn.Sequential(
nn.Linear(784, 256),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(256, 128),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(128, 10)
)
Training Loop
Complete Training Example
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
# Hyperparameters
batch_size = 64
learning_rate = 0.001
epochs = 10
# Data loading
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# Model
model = nn.Sequential(
nn.Flatten(),
nn.Linear(784, 256),
nn.ReLU(),
nn.Linear(256, 10)
)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Training loop
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
for epoch in range(epochs):
model.train()
total_loss = 0
for batch_idx, (data, targets) in enumerate(train_loader):
data, targets = data.to(device), targets.to(device)
# Forward pass
outputs = model(data)
loss = criterion(outputs, targets)
# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / len(train_loader)
print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}')
# Evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
for data, targets in test_loader:
data, targets = data.to(device), targets.to(device)
outputs = model(data)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
print(f'Test Accuracy: {100 * correct / total:.2f}%')
Convolutional Neural Networks
CNN for Image Classification
class CNN(nn.Module):
def __init__(self, num_classes=10):
super(CNN, self).__init__()
# Convolutional layers
self.conv_layers = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
# Fully connected layers
self.fc_layers = nn.Sequential(
nn.Flatten(),
nn.Linear(128 * 3 * 3, 256),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(256, num_classes)
)
def forward(self, x):
x = self.conv_layers(x)
x = self.fc_layers(x)
return x
model = CNN(num_classes=10)
Transfer Learning with ResNet
import torchvision.models as models
# Load pretrained model
model = models.resnet18(pretrained=True)
# Freeze pretrained layers
for param in model.parameters():
param.requires_grad = False
# Replace final layer
num_classes = 10
model.fc = nn.Linear(model.fc.in_features, num_classes)
# Only train the new layer
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
Recurrent Neural Networks
LSTM for Sequence Data
class LSTMModel(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(LSTMModel, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, x):
# Initialize hidden state
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
# Forward propagate LSTM
out, _ = self.lstm(x, (h0, c0))
# Decode the hidden state of the last time step
out = self.fc(out[:, -1, :])
return out
model = LSTMModel(input_size=28, hidden_size=128, num_layers=2, num_classes=10)
Data Loading
Custom Dataset
from torch.utils.data import Dataset, DataLoader
import pandas as pd
class CustomDataset(Dataset):
def __init__(self, csv_file, transform=None):
self.data = pd.read_csv(csv_file)
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
image = self.data.iloc[idx, 1:].values.reshape(28, 28).astype('float32')
label = self.data.iloc[idx, 0]
if self.transform:
image = self.transform(image)
return torch.tensor(image), torch.tensor(label)
# Create DataLoader
dataset = CustomDataset('data.csv')
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)
Data Augmentation
from torchvision import transforms
train_transform = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.RandomResizedCrop(224),
transforms.ColorJitter(brightness=0.2, contrast=0.2),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
test_transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
Model Saving and Loading
# Save entire model
torch.save(model, 'model.pth')
# Load entire model
model = torch.load('model.pth')
# Save only state dict (recommended)
torch.save(model.state_dict(), 'model_state.pth')
# Load state dict
model = CNN()
model.load_state_dict(torch.load('model_state.pth'))
model.eval()
# Save checkpoint
checkpoint = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': loss
}
torch.save(checkpoint, 'checkpoint.pth')
# Load checkpoint
checkpoint = torch.load('checkpoint.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
Learning Rate Scheduling
# Step LR
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
# Exponential decay
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
# Reduce on plateau
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5)
# In training loop
for epoch in range(epochs):
train()
val_loss = validate()
scheduler.step(val_loss) # For ReduceLROnPlateau
# scheduler.step() # For others
Early Stopping
class EarlyStopping:
def __init__(self, patience=5, min_delta=0):
self.patience = patience
self.min_delta = min_delta
self.counter = 0
self.best_loss = None
self.early_stop = False
def __call__(self, val_loss):
if self.best_loss is None:
self.best_loss = val_loss
elif val_loss > self.best_loss - self.min_delta:
self.counter += 1
if self.counter >= self.patience:
self.early_stop = True
else:
self.best_loss = val_loss
self.counter = 0
# Usage
early_stopping = EarlyStopping(patience=5)
for epoch in range(epochs):
train_loss = train()
val_loss = validate()
early_stopping(val_loss)
if early_stopping.early_stop:
print("Early stopping triggered")
break
Summary
| Component | Purpose |
|---|---|
| Tensors | Data containers with GPU support |
| Autograd | Automatic differentiation |
| nn.Module | Base class for models |
| DataLoader | Batch data loading |
| Optimizers | Update model parameters |
| Schedulers | Adjust learning rate |
PyTorch provides a flexible, Pythonic framework for deep learning research and production.
Advertisement
Moshiour Rahman
Software Architect & AI Engineer
Enterprise software architect with deep expertise in financial systems, distributed architecture, and AI-powered applications. Building large-scale systems at Fortune 500 companies. Specializing in LLM orchestration, multi-agent systems, and cloud-native solutions. I share battle-tested patterns from real enterprise projects.
Related Articles
AI Agents Fundamentals: Build Your First Agent from Scratch
Master AI agents from the ground up. Learn the agent loop, build a working agent in pure Python, and understand the foundations that power LangGraph and CrewAI.
PythonHugging Face Transformers: Complete Python Tutorial
Master Hugging Face Transformers for NLP tasks. Learn text classification, named entity recognition, question answering, and fine-tuning models.
PythonGetting Started with Machine Learning in Python: A Practical Guide
Learn machine learning fundamentals with Python. Build your first ML models using scikit-learn with hands-on examples for classification, regression, and real-world predictions.
Comments
Comments are powered by GitHub Discussions.
Configure Giscus at giscus.app to enable comments.