Computer Vision with OpenCV and Python: Complete Guide

What is Computer Vision?

Computer Vision enables machines to interpret and understand visual information from images and videos. OpenCV is the most popular library for building CV applications.

Common Applications

Application	Examples
Object Detection	Security cameras, autonomous vehicles
Face Recognition	Phone unlock, attendance systems
OCR	Document scanning, license plates
Medical Imaging	X-ray analysis, tumor detection

Getting Started

Installation

pip install opencv-python opencv-python-headless
pip install numpy matplotlib

Basic Operations

import cv2
import numpy as np

# Read image
image = cv2.imread('photo.jpg')
print(f"Shape: {image.shape}")  # (height, width, channels)

# Convert color spaces
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

# Resize image
resized = cv2.resize(image, (640, 480))
scaled = cv2.resize(image, None, fx=0.5, fy=0.5)

# Crop image
cropped = image[100:400, 200:500]  # [y1:y2, x1:x2]

# Save image
cv2.imwrite('output.jpg', image)

# Display image
cv2.imshow('Image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()

Drawing Operations

# Create blank image
canvas = np.zeros((500, 500, 3), dtype=np.uint8)

# Draw shapes
cv2.line(canvas, (0, 0), (500, 500), (255, 0, 0), 2)
cv2.rectangle(canvas, (100, 100), (300, 300), (0, 255, 0), 2)
cv2.circle(canvas, (250, 250), 100, (0, 0, 255), -1)  # -1 = filled

# Draw text
cv2.putText(canvas, 'OpenCV', (150, 450),
            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

Image Processing

Filtering

# Blur
blur = cv2.blur(image, (5, 5))
gaussian = cv2.GaussianBlur(image, (5, 5), 0)
median = cv2.medianBlur(image, 5)
bilateral = cv2.bilateralFilter(image, 9, 75, 75)

# Sharpening
kernel = np.array([[-1, -1, -1],
                   [-1,  9, -1],
                   [-1, -1, -1]])
sharpened = cv2.filter2D(image, -1, kernel)

Edge Detection

# Canny edge detection
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 100, 200)

# Sobel edges
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
sobel = cv2.magnitude(sobelx, sobely)

# Laplacian
laplacian = cv2.Laplacian(gray, cv2.CV_64F)

Thresholding

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Simple threshold
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
_, binary_inv = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)

# Adaptive threshold
adaptive_mean = cv2.adaptiveThreshold(
    gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
    cv2.THRESH_BINARY, 11, 2
)

adaptive_gaussian = cv2.adaptiveThreshold(
    gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
    cv2.THRESH_BINARY, 11, 2
)

# Otsu's threshold
_, otsu = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

Morphological Operations

kernel = np.ones((5, 5), np.uint8)

# Erosion and dilation
erosion = cv2.erode(binary, kernel, iterations=1)
dilation = cv2.dilate(binary, kernel, iterations=1)

# Opening (erosion then dilation)
opening = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)

# Closing (dilation then erosion)
closing = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)

# Gradient
gradient = cv2.morphologyEx(binary, cv2.MORPH_GRADIENT, kernel)

Contour Detection

# Find contours
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(
    binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)

# Draw contours
result = image.copy()
cv2.drawContours(result, contours, -1, (0, 255, 0), 2)

# Analyze contours
for contour in contours:
    # Area and perimeter
    area = cv2.contourArea(contour)
    perimeter = cv2.arcLength(contour, True)

    # Bounding rectangle
    x, y, w, h = cv2.boundingRect(contour)
    cv2.rectangle(result, (x, y), (x+w, y+h), (255, 0, 0), 2)

    # Minimum enclosing circle
    (cx, cy), radius = cv2.minEnclosingCircle(contour)
    cv2.circle(result, (int(cx), int(cy)), int(radius), (0, 0, 255), 2)

    # Centroid
    M = cv2.moments(contour)
    if M['m00'] != 0:
        cx = int(M['m10'] / M['m00'])
        cy = int(M['m01'] / M['m00'])

Face Detection

Haar Cascade

# Load cascade
face_cascade = cv2.CascadeClassifier(
    cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
eye_cascade = cv2.CascadeClassifier(
    cv2.data.haarcascades + 'haarcascade_eye.xml'
)

# Detect faces
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.1, 4)

# Draw rectangles
for (x, y, w, h) in faces:
    cv2.rectangle(image, (x, y), (x+w, y+h), (255, 0, 0), 2)

    # Detect eyes within face region
    roi_gray = gray[y:y+h, x:x+w]
    roi_color = image[y:y+h, x:x+w]
    eyes = eye_cascade.detectMultiScale(roi_gray)
    for (ex, ey, ew, eh) in eyes:
        cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 2)

Deep Learning Face Detection

# Load DNN model
modelFile = "res10_300x300_ssd_iter_140000.caffemodel"
configFile = "deploy.prototxt"
net = cv2.dnn.readNetFromCaffe(configFile, modelFile)

def detect_faces_dnn(image, confidence_threshold=0.5):
    h, w = image.shape[:2]
    blob = cv2.dnn.blobFromImage(
        image, 1.0, (300, 300), (104.0, 177.0, 123.0)
    )

    net.setInput(blob)
    detections = net.forward()

    faces = []
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > confidence_threshold:
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            faces.append(box.astype(int))

    return faces

Object Detection with YOLO

# Load YOLO
net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Load classes
with open('coco.names', 'r') as f:
    classes = [line.strip() for line in f.readlines()]

def detect_objects(image, confidence_threshold=0.5, nms_threshold=0.4):
    height, width = image.shape[:2]

    # Prepare image
    blob = cv2.dnn.blobFromImage(
        image, 1/255.0, (416, 416), swapRB=True, crop=False
    )
    net.setInput(blob)
    outputs = net.forward(output_layers)

    boxes = []
    confidences = []
    class_ids = []

    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence > confidence_threshold:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Non-maximum suppression
    indices = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, nms_threshold)

    results = []
    for i in indices.flatten():
        results.append({
            'class': classes[class_ids[i]],
            'confidence': confidences[i],
            'box': boxes[i]
        })

    return results

Video Processing

Basic Video Operations

# Read from file
cap = cv2.VideoCapture('video.mp4')

# Read from webcam
cap = cv2.VideoCapture(0)

# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Process frames
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Process frame
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    cv2.imshow('Frame', gray)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

Video Writer

# Create video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output.mp4', fourcc, 30.0, (640, 480))

cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Process frame
    frame = cv2.resize(frame, (640, 480))

    # Write frame
    out.write(frame)

    cv2.imshow('Recording', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()

Motion Detection

def detect_motion(video_source=0):
    cap = cv2.VideoCapture(video_source)
    ret, frame1 = cap.read()
    ret, frame2 = cap.read()

    while cap.isOpened():
        diff = cv2.absdiff(frame1, frame2)
        gray = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
        blur = cv2.GaussianBlur(gray, (5, 5), 0)
        _, thresh = cv2.threshold(blur, 20, 255, cv2.THRESH_BINARY)
        dilated = cv2.dilate(thresh, None, iterations=3)

        contours, _ = cv2.findContours(
            dilated, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
        )

        for contour in contours:
            if cv2.contourArea(contour) < 1000:
                continue
            x, y, w, h = cv2.boundingRect(contour)
            cv2.rectangle(frame1, (x, y), (x+w, y+h), (0, 255, 0), 2)

        cv2.imshow('Motion Detection', frame1)

        frame1 = frame2
        ret, frame2 = cap.read()

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

Summary

Task	Method
Edge Detection	`cv2.Canny()`
Face Detection	Haar Cascades, DNN
Object Detection	YOLO, SSD
Contour Detection	`cv2.findContours()`
Video Processing	`cv2.VideoCapture()`

OpenCV provides powerful tools for building computer vision applications across various domains.