Computer Vision with OpenCV and Python: Complete Guide
Master computer vision with OpenCV Python. Learn image processing, object detection, face recognition, and build real-world vision applications.
Moshiour Rahman
Advertisement
What is Computer Vision?
Computer Vision enables machines to interpret and understand visual information from images and videos. OpenCV is the most popular library for building CV applications.
Common Applications
| Application | Examples |
|---|---|
| Object Detection | Security cameras, autonomous vehicles |
| Face Recognition | Phone unlock, attendance systems |
| OCR | Document scanning, license plates |
| Medical Imaging | X-ray analysis, tumor detection |
Getting Started
Installation
pip install opencv-python opencv-python-headless
pip install numpy matplotlib
Basic Operations
import cv2
import numpy as np
# Read image
image = cv2.imread('photo.jpg')
print(f"Shape: {image.shape}") # (height, width, channels)
# Convert color spaces
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# Resize image
resized = cv2.resize(image, (640, 480))
scaled = cv2.resize(image, None, fx=0.5, fy=0.5)
# Crop image
cropped = image[100:400, 200:500] # [y1:y2, x1:x2]
# Save image
cv2.imwrite('output.jpg', image)
# Display image
cv2.imshow('Image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
Drawing Operations
# Create blank image
canvas = np.zeros((500, 500, 3), dtype=np.uint8)
# Draw shapes
cv2.line(canvas, (0, 0), (500, 500), (255, 0, 0), 2)
cv2.rectangle(canvas, (100, 100), (300, 300), (0, 255, 0), 2)
cv2.circle(canvas, (250, 250), 100, (0, 0, 255), -1) # -1 = filled
# Draw text
cv2.putText(canvas, 'OpenCV', (150, 450),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
Image Processing
Filtering
# Blur
blur = cv2.blur(image, (5, 5))
gaussian = cv2.GaussianBlur(image, (5, 5), 0)
median = cv2.medianBlur(image, 5)
bilateral = cv2.bilateralFilter(image, 9, 75, 75)
# Sharpening
kernel = np.array([[-1, -1, -1],
[-1, 9, -1],
[-1, -1, -1]])
sharpened = cv2.filter2D(image, -1, kernel)
Edge Detection
# Canny edge detection
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 100, 200)
# Sobel edges
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
sobel = cv2.magnitude(sobelx, sobely)
# Laplacian
laplacian = cv2.Laplacian(gray, cv2.CV_64F)
Thresholding
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Simple threshold
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
_, binary_inv = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
# Adaptive threshold
adaptive_mean = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 11, 2
)
adaptive_gaussian = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2
)
# Otsu's threshold
_, otsu = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
Morphological Operations
kernel = np.ones((5, 5), np.uint8)
# Erosion and dilation
erosion = cv2.erode(binary, kernel, iterations=1)
dilation = cv2.dilate(binary, kernel, iterations=1)
# Opening (erosion then dilation)
opening = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
# Closing (dilation then erosion)
closing = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
# Gradient
gradient = cv2.morphologyEx(binary, cv2.MORPH_GRADIENT, kernel)
Contour Detection
# Find contours
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(
binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)
# Draw contours
result = image.copy()
cv2.drawContours(result, contours, -1, (0, 255, 0), 2)
# Analyze contours
for contour in contours:
# Area and perimeter
area = cv2.contourArea(contour)
perimeter = cv2.arcLength(contour, True)
# Bounding rectangle
x, y, w, h = cv2.boundingRect(contour)
cv2.rectangle(result, (x, y), (x+w, y+h), (255, 0, 0), 2)
# Minimum enclosing circle
(cx, cy), radius = cv2.minEnclosingCircle(contour)
cv2.circle(result, (int(cx), int(cy)), int(radius), (0, 0, 255), 2)
# Centroid
M = cv2.moments(contour)
if M['m00'] != 0:
cx = int(M['m10'] / M['m00'])
cy = int(M['m01'] / M['m00'])
Face Detection
Haar Cascade
# Load cascade
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
eye_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_eye.xml'
)
# Detect faces
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
# Draw rectangles
for (x, y, w, h) in faces:
cv2.rectangle(image, (x, y), (x+w, y+h), (255, 0, 0), 2)
# Detect eyes within face region
roi_gray = gray[y:y+h, x:x+w]
roi_color = image[y:y+h, x:x+w]
eyes = eye_cascade.detectMultiScale(roi_gray)
for (ex, ey, ew, eh) in eyes:
cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 2)
Deep Learning Face Detection
# Load DNN model
modelFile = "res10_300x300_ssd_iter_140000.caffemodel"
configFile = "deploy.prototxt"
net = cv2.dnn.readNetFromCaffe(configFile, modelFile)
def detect_faces_dnn(image, confidence_threshold=0.5):
h, w = image.shape[:2]
blob = cv2.dnn.blobFromImage(
image, 1.0, (300, 300), (104.0, 177.0, 123.0)
)
net.setInput(blob)
detections = net.forward()
faces = []
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > confidence_threshold:
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
faces.append(box.astype(int))
return faces
Object Detection with YOLO
# Load YOLO
net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
# Load classes
with open('coco.names', 'r') as f:
classes = [line.strip() for line in f.readlines()]
def detect_objects(image, confidence_threshold=0.5, nms_threshold=0.4):
height, width = image.shape[:2]
# Prepare image
blob = cv2.dnn.blobFromImage(
image, 1/255.0, (416, 416), swapRB=True, crop=False
)
net.setInput(blob)
outputs = net.forward(output_layers)
boxes = []
confidences = []
class_ids = []
for output in outputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > confidence_threshold:
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
# Non-maximum suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, nms_threshold)
results = []
for i in indices.flatten():
results.append({
'class': classes[class_ids[i]],
'confidence': confidences[i],
'box': boxes[i]
})
return results
Video Processing
Basic Video Operations
# Read from file
cap = cv2.VideoCapture('video.mp4')
# Read from webcam
cap = cv2.VideoCapture(0)
# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Process frames
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Process frame
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
cv2.imshow('Frame', gray)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
Video Writer
# Create video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output.mp4', fourcc, 30.0, (640, 480))
cap = cv2.VideoCapture(0)
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Process frame
frame = cv2.resize(frame, (640, 480))
# Write frame
out.write(frame)
cv2.imshow('Recording', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
out.release()
Motion Detection
def detect_motion(video_source=0):
cap = cv2.VideoCapture(video_source)
ret, frame1 = cap.read()
ret, frame2 = cap.read()
while cap.isOpened():
diff = cv2.absdiff(frame1, frame2)
gray = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
_, thresh = cv2.threshold(blur, 20, 255, cv2.THRESH_BINARY)
dilated = cv2.dilate(thresh, None, iterations=3)
contours, _ = cv2.findContours(
dilated, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
)
for contour in contours:
if cv2.contourArea(contour) < 1000:
continue
x, y, w, h = cv2.boundingRect(contour)
cv2.rectangle(frame1, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.imshow('Motion Detection', frame1)
frame1 = frame2
ret, frame2 = cap.read()
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
Summary
| Task | Method |
|---|---|
| Edge Detection | cv2.Canny() |
| Face Detection | Haar Cascades, DNN |
| Object Detection | YOLO, SSD |
| Contour Detection | cv2.findContours() |
| Video Processing | cv2.VideoCapture() |
OpenCV provides powerful tools for building computer vision applications across various domains.
Advertisement
Moshiour Rahman
Software Architect & AI Engineer
Enterprise software architect with deep expertise in financial systems, distributed architecture, and AI-powered applications. Building large-scale systems at Fortune 500 companies. Specializing in LLM orchestration, multi-agent systems, and cloud-native solutions. I share battle-tested patterns from real enterprise projects.
Related Articles
AI Agents Fundamentals: Build Your First Agent from Scratch
Master AI agents from the ground up. Learn the agent loop, build a working agent in pure Python, and understand the foundations that power LangGraph and CrewAI.
PythonHugging Face Transformers: Complete Python Tutorial
Master Hugging Face Transformers for NLP tasks. Learn text classification, named entity recognition, question answering, and fine-tuning models.
PythonGetting Started with Machine Learning in Python: A Practical Guide
Learn machine learning fundamentals with Python. Build your first ML models using scikit-learn with hands-on examples for classification, regression, and real-world predictions.
Comments
Comments are powered by GitHub Discussions.
Configure Giscus at giscus.app to enable comments.