PostgreSQL Advanced Guide: From Queries to Performance Tuning

Why PostgreSQL?

PostgreSQL is a powerful, open-source relational database known for its reliability, feature set, and standards compliance. It excels at complex queries, data integrity, and extensibility.

Key Features

Feature	Description
ACID Compliant	Full transaction support
JSON Support	Store and query JSON data
Full-Text Search	Built-in text search
Extensions	PostGIS, pg_stat, etc.
Replication	Streaming and logical

Getting Started

Connection with Node.js

import { Pool, PoolClient } from 'pg';

const pool = new Pool({
  host: 'localhost',
  port: 5432,
  database: 'myapp',
  user: 'postgres',
  password: 'password',
  max: 20,
  idleTimeoutMillis: 30000,
  connectionTimeoutMillis: 2000,
});

// Query function
async function query(text: string, params?: any[]) {
  const start = Date.now();
  const result = await pool.query(text, params);
  const duration = Date.now() - start;
  console.log('Query executed', { text, duration, rows: result.rowCount });
  return result;
}

// Get client for transactions
async function getClient(): Promise<PoolClient> {
  return await pool.connect();
}

export { pool, query, getClient };

Advanced SQL Queries

Common Table Expressions (CTEs)

-- Basic CTE
WITH active_users AS (
  SELECT id, name, email
  FROM users
  WHERE status = 'active'
)
SELECT * FROM active_users WHERE email LIKE '%@gmail.com';

-- Recursive CTE (hierarchical data)
WITH RECURSIVE category_tree AS (
  -- Base case
  SELECT id, name, parent_id, 1 as level
  FROM categories
  WHERE parent_id IS NULL

  UNION ALL

  -- Recursive case
  SELECT c.id, c.name, c.parent_id, ct.level + 1
  FROM categories c
  JOIN category_tree ct ON c.parent_id = ct.id
)
SELECT * FROM category_tree ORDER BY level, name;

-- Multiple CTEs
WITH
monthly_sales AS (
  SELECT DATE_TRUNC('month', created_at) as month,
         SUM(amount) as total
  FROM orders
  GROUP BY 1
),
monthly_avg AS (
  SELECT AVG(total) as avg_sales FROM monthly_sales
)
SELECT month, total,
       total - (SELECT avg_sales FROM monthly_avg) as diff_from_avg
FROM monthly_sales;

Window Functions

-- Row number, rank, dense_rank
SELECT
  name,
  department,
  salary,
  ROW_NUMBER() OVER (ORDER BY salary DESC) as row_num,
  RANK() OVER (ORDER BY salary DESC) as rank,
  DENSE_RANK() OVER (ORDER BY salary DESC) as dense_rank
FROM employees;

-- Partition by department
SELECT
  name,
  department,
  salary,
  RANK() OVER (PARTITION BY department ORDER BY salary DESC) as dept_rank,
  salary - AVG(salary) OVER (PARTITION BY department) as diff_from_dept_avg
FROM employees;

-- Running totals and moving averages
SELECT
  date,
  amount,
  SUM(amount) OVER (ORDER BY date) as running_total,
  AVG(amount) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) as moving_avg_7day
FROM daily_sales;

-- Lead and lag
SELECT
  date,
  amount,
  LAG(amount, 1) OVER (ORDER BY date) as prev_day,
  LEAD(amount, 1) OVER (ORDER BY date) as next_day,
  amount - LAG(amount, 1) OVER (ORDER BY date) as day_over_day_change
FROM daily_sales;

-- First and last values
SELECT DISTINCT
  department,
  FIRST_VALUE(name) OVER (PARTITION BY department ORDER BY salary DESC) as highest_paid,
  LAST_VALUE(name) OVER (
    PARTITION BY department
    ORDER BY salary DESC
    RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
  ) as lowest_paid
FROM employees;

JSON Operations

-- Create table with JSON
CREATE TABLE products (
  id SERIAL PRIMARY KEY,
  name VARCHAR(255),
  attributes JSONB NOT NULL DEFAULT '{}'
);

-- Insert JSON data
INSERT INTO products (name, attributes) VALUES
('Laptop', '{"brand": "Dell", "specs": {"ram": 16, "storage": 512}, "tags": ["electronics", "computer"]}'),
('Phone', '{"brand": "Apple", "specs": {"ram": 8, "storage": 256}, "tags": ["electronics", "mobile"]}');

-- Query JSON fields
SELECT name, attributes->>'brand' as brand
FROM products;

-- Query nested JSON
SELECT name, attributes->'specs'->>'ram' as ram
FROM products;

-- Filter by JSON value
SELECT * FROM products
WHERE attributes->>'brand' = 'Dell';

-- Filter by nested value
SELECT * FROM products
WHERE (attributes->'specs'->>'ram')::int >= 16;

-- Array contains
SELECT * FROM products
WHERE attributes->'tags' ? 'mobile';

-- JSONB operators
SELECT * FROM products
WHERE attributes @> '{"brand": "Apple"}';

-- Update JSON
UPDATE products
SET attributes = jsonb_set(attributes, '{specs,ram}', '32')
WHERE name = 'Laptop';

-- Add to JSON array
UPDATE products
SET attributes = jsonb_set(
  attributes,
  '{tags}',
  attributes->'tags' || '"sale"'::jsonb
)
WHERE name = 'Phone';

-- Index on JSONB
CREATE INDEX idx_products_attributes ON products USING GIN (attributes);
CREATE INDEX idx_products_brand ON products ((attributes->>'brand'));

Full-Text Search

-- Create table with tsvector
CREATE TABLE articles (
  id SERIAL PRIMARY KEY,
  title VARCHAR(255),
  content TEXT,
  search_vector TSVECTOR
);

-- Create index
CREATE INDEX idx_articles_search ON articles USING GIN (search_vector);

-- Trigger to update search vector
CREATE OR REPLACE FUNCTION update_search_vector()
RETURNS TRIGGER AS $$
BEGIN
  NEW.search_vector := to_tsvector('english', COALESCE(NEW.title, '') || ' ' || COALESCE(NEW.content, ''));
  RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER articles_search_update
  BEFORE INSERT OR UPDATE ON articles
  FOR EACH ROW EXECUTE FUNCTION update_search_vector();

-- Search queries
SELECT title, ts_rank(search_vector, query) as rank
FROM articles, to_tsquery('english', 'postgresql & performance') query
WHERE search_vector @@ query
ORDER BY rank DESC;

-- Search with highlighting
SELECT
  title,
  ts_headline('english', content, to_tsquery('postgresql & optimization'),
    'StartSel=<mark>, StopSel=</mark>, MaxWords=50'
  ) as highlighted
FROM articles
WHERE search_vector @@ to_tsquery('postgresql & optimization');

Indexing Strategies

Index Types

-- B-tree (default, most common)
CREATE INDEX idx_users_email ON users (email);

-- Hash (equality only)
CREATE INDEX idx_users_id_hash ON users USING HASH (id);

-- GIN (arrays, JSONB, full-text)
CREATE INDEX idx_products_tags ON products USING GIN (tags);

-- GiST (geometric, full-text, ranges)
CREATE INDEX idx_locations_point ON locations USING GIST (point);

-- BRIN (large sequential data)
CREATE INDEX idx_logs_created ON logs USING BRIN (created_at);

Index Optimization

-- Partial index
CREATE INDEX idx_orders_pending ON orders (created_at)
WHERE status = 'pending';

-- Expression index
CREATE INDEX idx_users_lower_email ON users (LOWER(email));

-- Covering index (include columns)
CREATE INDEX idx_orders_user ON orders (user_id)
INCLUDE (total, status);

-- Composite index
CREATE INDEX idx_orders_user_date ON orders (user_id, created_at DESC);

-- Analyze index usage
SELECT
  schemaname,
  tablename,
  indexname,
  idx_scan,
  idx_tup_read,
  idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;

-- Find unused indexes
SELECT
  schemaname || '.' || relname as table,
  indexrelname as index,
  pg_size_pretty(pg_relation_size(i.indexrelid)) as size,
  idx_scan as scans
FROM pg_stat_user_indexes ui
JOIN pg_index i ON ui.indexrelid = i.indexrelid
WHERE NOT indisunique AND idx_scan < 50
ORDER BY pg_relation_size(i.indexrelid) DESC;

Performance Optimization

EXPLAIN ANALYZE

-- Basic explain
EXPLAIN SELECT * FROM users WHERE email = 'john@example.com';

-- With execution stats
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'john@example.com';

-- Full analysis
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT u.name, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.created_at > '2024-01-01'
GROUP BY u.id
ORDER BY order_count DESC
LIMIT 10;

Query Optimization

-- Use EXISTS instead of IN for large sets
-- Bad
SELECT * FROM users WHERE id IN (SELECT user_id FROM orders);

-- Good
SELECT * FROM users u WHERE EXISTS (
  SELECT 1 FROM orders o WHERE o.user_id = u.id
);

-- Use specific columns instead of *
-- Bad
SELECT * FROM users;

-- Good
SELECT id, name, email FROM users;

-- Avoid functions on indexed columns
-- Bad (can't use index)
SELECT * FROM users WHERE LOWER(email) = 'john@example.com';

-- Good (if expression index exists)
CREATE INDEX idx_users_email_lower ON users (LOWER(email));
SELECT * FROM users WHERE LOWER(email) = 'john@example.com';

-- Use LIMIT with ORDER BY
SELECT * FROM logs ORDER BY created_at DESC LIMIT 100;

Connection Pooling

// PgBouncer configuration (pgbouncer.ini)
/*
[databases]
myapp = host=localhost port=5432 dbname=myapp

[pgbouncer]
listen_port = 6432
listen_addr = *
auth_type = md5
auth_file = /etc/pgbouncer/userlist.txt
pool_mode = transaction
max_client_conn = 1000
default_pool_size = 20
*/

// Application connects to PgBouncer
const pool = new Pool({
  host: 'localhost',
  port: 6432,  // PgBouncer port
  database: 'myapp'
});

Transactions and Locking

Transaction Isolation

-- Read Committed (default)
BEGIN;
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
-- queries
COMMIT;

-- Repeatable Read
BEGIN;
SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
-- queries see consistent snapshot
COMMIT;

-- Serializable
BEGIN;
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
-- strictest isolation
COMMIT;

Advisory Locks

-- Session-level lock
SELECT pg_advisory_lock(12345);
-- Do work
SELECT pg_advisory_unlock(12345);

-- Transaction-level lock (auto-released)
BEGIN;
SELECT pg_advisory_xact_lock(12345);
-- Do work
COMMIT;  -- Lock released

-- Try lock (non-blocking)
SELECT pg_try_advisory_lock(12345);

Row-Level Locking

-- SELECT FOR UPDATE
BEGIN;
SELECT * FROM accounts WHERE id = 1 FOR UPDATE;
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
COMMIT;

-- SKIP LOCKED (for job queues)
BEGIN;
SELECT * FROM jobs
WHERE status = 'pending'
ORDER BY created_at
LIMIT 1
FOR UPDATE SKIP LOCKED;

UPDATE jobs SET status = 'processing' WHERE id = ?;
COMMIT;

Partitioning

Range Partitioning

-- Create partitioned table
CREATE TABLE orders (
  id SERIAL,
  user_id INT,
  amount DECIMAL,
  created_at TIMESTAMP NOT NULL
) PARTITION BY RANGE (created_at);

-- Create partitions
CREATE TABLE orders_2024_q1 PARTITION OF orders
  FOR VALUES FROM ('2024-01-01') TO ('2024-04-01');

CREATE TABLE orders_2024_q2 PARTITION OF orders
  FOR VALUES FROM ('2024-04-01') TO ('2024-07-01');

-- Auto-create partitions (pg_partman extension)
CREATE EXTENSION pg_partman;

SELECT create_parent(
  'public.orders',
  'created_at',
  'native',
  'monthly'
);

List Partitioning

CREATE TABLE sales (
  id SERIAL,
  region VARCHAR(50),
  amount DECIMAL
) PARTITION BY LIST (region);

CREATE TABLE sales_na PARTITION OF sales
  FOR VALUES IN ('US', 'Canada', 'Mexico');

CREATE TABLE sales_eu PARTITION OF sales
  FOR VALUES IN ('UK', 'Germany', 'France');

Replication and High Availability

Streaming Replication Setup

-- Primary server (postgresql.conf)
-- wal_level = replica
-- max_wal_senders = 10
-- wal_keep_size = 1GB

-- Create replication user
CREATE ROLE replicator WITH REPLICATION LOGIN PASSWORD 'secret';

-- pg_hba.conf
-- host replication replicator replica_ip/32 md5

-- Replica server
-- pg_basebackup -h primary_ip -D /var/lib/postgresql/data -U replicator -P -R

Logical Replication

-- Publisher
CREATE PUBLICATION my_publication FOR TABLE users, orders;

-- Subscriber
CREATE SUBSCRIPTION my_subscription
  CONNECTION 'host=publisher_ip dbname=myapp user=replicator'
  PUBLICATION my_publication;

Monitoring

Key Metrics

-- Active connections
SELECT count(*) FROM pg_stat_activity WHERE state = 'active';

-- Long-running queries
SELECT pid, now() - pg_stat_activity.query_start AS duration, query
FROM pg_stat_activity
WHERE state = 'active' AND now() - pg_stat_activity.query_start > interval '5 minutes';

-- Table statistics
SELECT
  relname as table,
  seq_scan,
  idx_scan,
  n_tup_ins as inserts,
  n_tup_upd as updates,
  n_tup_del as deletes
FROM pg_stat_user_tables
ORDER BY seq_scan DESC;

-- Cache hit ratio
SELECT
  sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) as cache_hit_ratio
FROM pg_statio_user_tables;

-- Database size
SELECT pg_size_pretty(pg_database_size('myapp'));

-- Table sizes
SELECT
  relname as table,
  pg_size_pretty(pg_total_relation_size(relid)) as total_size
FROM pg_catalog.pg_statio_user_tables
ORDER BY pg_total_relation_size(relid) DESC
LIMIT 10;

Summary

Feature	Use Case
CTEs	Complex queries, recursion
Window Functions	Rankings, running totals
JSONB	Flexible schema data
Full-Text Search	Text search without external tools
Partitioning	Large tables, time-series
Replication	High availability, read scaling

PostgreSQL provides enterprise-grade features for demanding applications while remaining open-source and highly extensible.