Python 7 min read

Deep Learning with PyTorch: Complete Beginner's Guide

Learn deep learning with PyTorch from scratch. Build neural networks, CNNs, RNNs, and train models for image classification and NLP tasks.

MR

Moshiour Rahman

Advertisement

What is PyTorch?

PyTorch is an open-source deep learning framework developed by Facebook. It’s known for its dynamic computation graphs, intuitive API, and strong GPU acceleration support.

PyTorch vs TensorFlow

FeaturePyTorchTensorFlow
Graph typeDynamicStatic (eager available)
DebuggingEasy with PythonMore complex
Learning curveGentlerSteeper
ProductionTorchServeTF Serving
ResearchPreferredGrowing

Installation

pip install torch torchvision torchaudio
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

Tensors

Creating Tensors

import torch

# From Python list
x = torch.tensor([1, 2, 3, 4])
print(x)  # tensor([1, 2, 3, 4])

# 2D tensor
matrix = torch.tensor([[1, 2], [3, 4]])

# Zeros and ones
zeros = torch.zeros(3, 4)
ones = torch.ones(2, 3)

# Random tensors
random = torch.rand(3, 3)  # Uniform [0, 1)
randn = torch.randn(3, 3)  # Normal distribution

# Range
arange = torch.arange(0, 10, 2)  # tensor([0, 2, 4, 6, 8])

# Like another tensor
x = torch.rand(2, 3)
y = torch.zeros_like(x)

Tensor Properties

x = torch.rand(3, 4)

print(x.shape)      # torch.Size([3, 4])
print(x.dtype)      # torch.float32
print(x.device)     # cpu
print(x.requires_grad)  # False

Tensor Operations

a = torch.tensor([1, 2, 3], dtype=torch.float32)
b = torch.tensor([4, 5, 6], dtype=torch.float32)

# Element-wise operations
print(a + b)  # tensor([5., 7., 9.])
print(a * b)  # tensor([4., 10., 18.])

# Matrix multiplication
x = torch.rand(2, 3)
y = torch.rand(3, 4)
z = torch.mm(x, y)  # or x @ y
print(z.shape)  # torch.Size([2, 4])

# Reshape
x = torch.arange(12)
x = x.view(3, 4)  # or x.reshape(3, 4)
print(x.shape)  # torch.Size([3, 4])

# Aggregations
print(x.sum())
print(x.mean())
print(x.max())
print(x.argmax())

GPU Support

# Check CUDA availability
print(torch.cuda.is_available())

# Move tensor to GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x = torch.rand(3, 3).to(device)

# Create directly on GPU
x = torch.rand(3, 3, device=device)

Autograd

Automatic Differentiation

# Enable gradient tracking
x = torch.tensor([2.0], requires_grad=True)
y = x ** 2 + 3 * x + 1

# Compute gradients
y.backward()
print(x.grad)  # tensor([7.]) = 2*x + 3 = 2*2 + 3

# Disable gradient tracking
with torch.no_grad():
    z = x * 2  # No gradient computed

Building Neural Networks

Simple Neural Network

import torch.nn as nn

class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Create model
model = SimpleNN(input_size=784, hidden_size=128, num_classes=10)
print(model)

Using nn.Sequential

model = nn.Sequential(
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(128, 10)
)

Training Loop

Complete Training Example

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# Hyperparameters
batch_size = 64
learning_rate = 0.001
epochs = 10

# Data loading
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Model
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Linear(256, 10)
)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(epochs):
    model.train()
    total_loss = 0

    for batch_idx, (data, targets) in enumerate(train_loader):
        data, targets = data.to(device), targets.to(device)

        # Forward pass
        outputs = model(data)
        loss = criterion(outputs, targets)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}')

# Evaluation
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for data, targets in test_loader:
        data, targets = data.to(device), targets.to(device)
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

print(f'Test Accuracy: {100 * correct / total:.2f}%')

Convolutional Neural Networks

CNN for Image Classification

class CNN(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()

        # Convolutional layers
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        # Fully connected layers
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 3 * 3, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

model = CNN(num_classes=10)

Transfer Learning with ResNet

import torchvision.models as models

# Load pretrained model
model = models.resnet18(pretrained=True)

# Freeze pretrained layers
for param in model.parameters():
    param.requires_grad = False

# Replace final layer
num_classes = 10
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Only train the new layer
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

Recurrent Neural Networks

LSTM for Sequence Data

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

model = LSTMModel(input_size=28, hidden_size=128, num_layers=2, num_classes=10)

Data Loading

Custom Dataset

from torch.utils.data import Dataset, DataLoader
import pandas as pd

class CustomDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.reshape(28, 28).astype('float32')
        label = self.data.iloc[idx, 0]

        if self.transform:
            image = self.transform(image)

        return torch.tensor(image), torch.tensor(label)

# Create DataLoader
dataset = CustomDataset('data.csv')
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)

Data Augmentation

from torchvision import transforms

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(224),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

Model Saving and Loading

# Save entire model
torch.save(model, 'model.pth')

# Load entire model
model = torch.load('model.pth')

# Save only state dict (recommended)
torch.save(model.state_dict(), 'model_state.pth')

# Load state dict
model = CNN()
model.load_state_dict(torch.load('model_state.pth'))
model.eval()

# Save checkpoint
checkpoint = {
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss
}
torch.save(checkpoint, 'checkpoint.pth')

# Load checkpoint
checkpoint = torch.load('checkpoint.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

Learning Rate Scheduling

# Step LR
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# Exponential decay
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)

# Reduce on plateau
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5)

# In training loop
for epoch in range(epochs):
    train()
    val_loss = validate()
    scheduler.step(val_loss)  # For ReduceLROnPlateau
    # scheduler.step()  # For others

Early Stopping

class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

# Usage
early_stopping = EarlyStopping(patience=5)

for epoch in range(epochs):
    train_loss = train()
    val_loss = validate()

    early_stopping(val_loss)
    if early_stopping.early_stop:
        print("Early stopping triggered")
        break

Summary

ComponentPurpose
TensorsData containers with GPU support
AutogradAutomatic differentiation
nn.ModuleBase class for models
DataLoaderBatch data loading
OptimizersUpdate model parameters
SchedulersAdjust learning rate

PyTorch provides a flexible, Pythonic framework for deep learning research and production.

Advertisement

MR

Moshiour Rahman

Software Architect & AI Engineer

Share:
MR

Moshiour Rahman

Software Architect & AI Engineer

Enterprise software architect with deep expertise in financial systems, distributed architecture, and AI-powered applications. Building large-scale systems at Fortune 500 companies. Specializing in LLM orchestration, multi-agent systems, and cloud-native solutions. I share battle-tested patterns from real enterprise projects.

Related Articles

Comments

Comments are powered by GitHub Discussions.

Configure Giscus at giscus.app to enable comments.