Python 6 min read

Computer Vision with OpenCV and Python: Complete Guide

Master computer vision with OpenCV Python. Learn image processing, object detection, face recognition, and build real-world vision applications.

MR

Moshiour Rahman

Advertisement

What is Computer Vision?

Computer Vision enables machines to interpret and understand visual information from images and videos. OpenCV is the most popular library for building CV applications.

Common Applications

ApplicationExamples
Object DetectionSecurity cameras, autonomous vehicles
Face RecognitionPhone unlock, attendance systems
OCRDocument scanning, license plates
Medical ImagingX-ray analysis, tumor detection

Getting Started

Installation

pip install opencv-python opencv-python-headless
pip install numpy matplotlib

Basic Operations

import cv2
import numpy as np

# Read image
image = cv2.imread('photo.jpg')
print(f"Shape: {image.shape}")  # (height, width, channels)

# Convert color spaces
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

# Resize image
resized = cv2.resize(image, (640, 480))
scaled = cv2.resize(image, None, fx=0.5, fy=0.5)

# Crop image
cropped = image[100:400, 200:500]  # [y1:y2, x1:x2]

# Save image
cv2.imwrite('output.jpg', image)

# Display image
cv2.imshow('Image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()

Drawing Operations

# Create blank image
canvas = np.zeros((500, 500, 3), dtype=np.uint8)

# Draw shapes
cv2.line(canvas, (0, 0), (500, 500), (255, 0, 0), 2)
cv2.rectangle(canvas, (100, 100), (300, 300), (0, 255, 0), 2)
cv2.circle(canvas, (250, 250), 100, (0, 0, 255), -1)  # -1 = filled

# Draw text
cv2.putText(canvas, 'OpenCV', (150, 450),
            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

Image Processing

Filtering

# Blur
blur = cv2.blur(image, (5, 5))
gaussian = cv2.GaussianBlur(image, (5, 5), 0)
median = cv2.medianBlur(image, 5)
bilateral = cv2.bilateralFilter(image, 9, 75, 75)

# Sharpening
kernel = np.array([[-1, -1, -1],
                   [-1,  9, -1],
                   [-1, -1, -1]])
sharpened = cv2.filter2D(image, -1, kernel)

Edge Detection

# Canny edge detection
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 100, 200)

# Sobel edges
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
sobel = cv2.magnitude(sobelx, sobely)

# Laplacian
laplacian = cv2.Laplacian(gray, cv2.CV_64F)

Thresholding

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Simple threshold
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
_, binary_inv = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)

# Adaptive threshold
adaptive_mean = cv2.adaptiveThreshold(
    gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
    cv2.THRESH_BINARY, 11, 2
)

adaptive_gaussian = cv2.adaptiveThreshold(
    gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
    cv2.THRESH_BINARY, 11, 2
)

# Otsu's threshold
_, otsu = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

Morphological Operations

kernel = np.ones((5, 5), np.uint8)

# Erosion and dilation
erosion = cv2.erode(binary, kernel, iterations=1)
dilation = cv2.dilate(binary, kernel, iterations=1)

# Opening (erosion then dilation)
opening = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)

# Closing (dilation then erosion)
closing = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)

# Gradient
gradient = cv2.morphologyEx(binary, cv2.MORPH_GRADIENT, kernel)

Contour Detection

# Find contours
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(
    binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)

# Draw contours
result = image.copy()
cv2.drawContours(result, contours, -1, (0, 255, 0), 2)

# Analyze contours
for contour in contours:
    # Area and perimeter
    area = cv2.contourArea(contour)
    perimeter = cv2.arcLength(contour, True)

    # Bounding rectangle
    x, y, w, h = cv2.boundingRect(contour)
    cv2.rectangle(result, (x, y), (x+w, y+h), (255, 0, 0), 2)

    # Minimum enclosing circle
    (cx, cy), radius = cv2.minEnclosingCircle(contour)
    cv2.circle(result, (int(cx), int(cy)), int(radius), (0, 0, 255), 2)

    # Centroid
    M = cv2.moments(contour)
    if M['m00'] != 0:
        cx = int(M['m10'] / M['m00'])
        cy = int(M['m01'] / M['m00'])

Face Detection

Haar Cascade

# Load cascade
face_cascade = cv2.CascadeClassifier(
    cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
eye_cascade = cv2.CascadeClassifier(
    cv2.data.haarcascades + 'haarcascade_eye.xml'
)

# Detect faces
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.1, 4)

# Draw rectangles
for (x, y, w, h) in faces:
    cv2.rectangle(image, (x, y), (x+w, y+h), (255, 0, 0), 2)

    # Detect eyes within face region
    roi_gray = gray[y:y+h, x:x+w]
    roi_color = image[y:y+h, x:x+w]
    eyes = eye_cascade.detectMultiScale(roi_gray)
    for (ex, ey, ew, eh) in eyes:
        cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 2)

Deep Learning Face Detection

# Load DNN model
modelFile = "res10_300x300_ssd_iter_140000.caffemodel"
configFile = "deploy.prototxt"
net = cv2.dnn.readNetFromCaffe(configFile, modelFile)

def detect_faces_dnn(image, confidence_threshold=0.5):
    h, w = image.shape[:2]
    blob = cv2.dnn.blobFromImage(
        image, 1.0, (300, 300), (104.0, 177.0, 123.0)
    )

    net.setInput(blob)
    detections = net.forward()

    faces = []
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > confidence_threshold:
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            faces.append(box.astype(int))

    return faces

Object Detection with YOLO

# Load YOLO
net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Load classes
with open('coco.names', 'r') as f:
    classes = [line.strip() for line in f.readlines()]

def detect_objects(image, confidence_threshold=0.5, nms_threshold=0.4):
    height, width = image.shape[:2]

    # Prepare image
    blob = cv2.dnn.blobFromImage(
        image, 1/255.0, (416, 416), swapRB=True, crop=False
    )
    net.setInput(blob)
    outputs = net.forward(output_layers)

    boxes = []
    confidences = []
    class_ids = []

    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence > confidence_threshold:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Non-maximum suppression
    indices = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, nms_threshold)

    results = []
    for i in indices.flatten():
        results.append({
            'class': classes[class_ids[i]],
            'confidence': confidences[i],
            'box': boxes[i]
        })

    return results

Video Processing

Basic Video Operations

# Read from file
cap = cv2.VideoCapture('video.mp4')

# Read from webcam
cap = cv2.VideoCapture(0)

# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Process frames
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Process frame
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    cv2.imshow('Frame', gray)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

Video Writer

# Create video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output.mp4', fourcc, 30.0, (640, 480))

cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Process frame
    frame = cv2.resize(frame, (640, 480))

    # Write frame
    out.write(frame)

    cv2.imshow('Recording', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()

Motion Detection

def detect_motion(video_source=0):
    cap = cv2.VideoCapture(video_source)
    ret, frame1 = cap.read()
    ret, frame2 = cap.read()

    while cap.isOpened():
        diff = cv2.absdiff(frame1, frame2)
        gray = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
        blur = cv2.GaussianBlur(gray, (5, 5), 0)
        _, thresh = cv2.threshold(blur, 20, 255, cv2.THRESH_BINARY)
        dilated = cv2.dilate(thresh, None, iterations=3)

        contours, _ = cv2.findContours(
            dilated, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
        )

        for contour in contours:
            if cv2.contourArea(contour) < 1000:
                continue
            x, y, w, h = cv2.boundingRect(contour)
            cv2.rectangle(frame1, (x, y), (x+w, y+h), (0, 255, 0), 2)

        cv2.imshow('Motion Detection', frame1)

        frame1 = frame2
        ret, frame2 = cap.read()

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

Summary

TaskMethod
Edge Detectioncv2.Canny()
Face DetectionHaar Cascades, DNN
Object DetectionYOLO, SSD
Contour Detectioncv2.findContours()
Video Processingcv2.VideoCapture()

OpenCV provides powerful tools for building computer vision applications across various domains.

Advertisement

MR

Moshiour Rahman

Software Architect & AI Engineer

Share:
MR

Moshiour Rahman

Software Architect & AI Engineer

Enterprise software architect with deep expertise in financial systems, distributed architecture, and AI-powered applications. Building large-scale systems at Fortune 500 companies. Specializing in LLM orchestration, multi-agent systems, and cloud-native solutions. I share battle-tested patterns from real enterprise projects.

Related Articles

Comments

Comments are powered by GitHub Discussions.

Configure Giscus at giscus.app to enable comments.