Elasticsearch: Complete Full-Text Search Guide
Master Elasticsearch for full-text search. Learn indexing, queries, aggregations, and build powerful search applications with Python.
Moshiour Rahman
Advertisement
What is Elasticsearch?
Elasticsearch is a distributed search and analytics engine built on Apache Lucene. It provides real-time search, scalability, and powerful full-text search capabilities.
Use Cases
| Use Case | Examples |
|---|---|
| Search | E-commerce, documentation |
| Logging | ELK stack, observability |
| Analytics | Business intelligence |
| APM | Application monitoring |
Getting Started
Installation
# Docker (recommended)
docker run -d --name elasticsearch \
-p 9200:9200 -p 9300:9300 \
-e "discovery.type=single-node" \
-e "xpack.security.enabled=false" \
elasticsearch:8.11.0
# Python client
pip install elasticsearch
Basic Connection
from elasticsearch import Elasticsearch
# Connect to Elasticsearch
es = Elasticsearch("http://localhost:9200")
# Check connection
if es.ping():
print("Connected to Elasticsearch")
info = es.info()
print(f"Version: {info['version']['number']}")
Indexing Documents
Create Index
from elasticsearch import Elasticsearch
es = Elasticsearch("http://localhost:9200")
# Define index mapping
mapping = {
"mappings": {
"properties": {
"title": {"type": "text", "analyzer": "english"},
"content": {"type": "text", "analyzer": "english"},
"author": {"type": "keyword"},
"tags": {"type": "keyword"},
"published_date": {"type": "date"},
"views": {"type": "integer"},
"rating": {"type": "float"}
}
},
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
}
}
# Create index
es.indices.create(index="articles", body=mapping, ignore=400)
# Check if index exists
if es.indices.exists(index="articles"):
print("Index created successfully")
Index Documents
# Index single document
doc = {
"title": "Getting Started with Elasticsearch",
"content": "Elasticsearch is a powerful search engine...",
"author": "John Doe",
"tags": ["elasticsearch", "search", "tutorial"],
"published_date": "2024-01-15",
"views": 1500,
"rating": 4.5
}
response = es.index(index="articles", id=1, document=doc)
print(f"Indexed document: {response['_id']}")
# Bulk indexing
from elasticsearch.helpers import bulk
documents = [
{
"_index": "articles",
"_id": i,
"_source": {
"title": f"Article {i}",
"content": f"Content for article {i}",
"author": "Jane Doe",
"tags": ["python", "tutorial"],
"published_date": "2024-01-20",
"views": 100 * i,
"rating": 4.0
}
}
for i in range(2, 102)
]
success, failed = bulk(es, documents)
print(f"Indexed {success} documents, {failed} failed")
# Refresh index
es.indices.refresh(index="articles")
Searching
Basic Search
# Match all
response = es.search(index="articles", query={"match_all": {}})
print(f"Total hits: {response['hits']['total']['value']}")
for hit in response['hits']['hits']:
print(f"Score: {hit['_score']}, Title: {hit['_source']['title']}")
# Match query (full-text search)
response = es.search(
index="articles",
query={
"match": {
"content": "elasticsearch search"
}
}
)
# Multi-match (search multiple fields)
response = es.search(
index="articles",
query={
"multi_match": {
"query": "python tutorial",
"fields": ["title^2", "content", "tags"] # title has 2x weight
}
}
)
Advanced Queries
# Bool query (combine conditions)
response = es.search(
index="articles",
query={
"bool": {
"must": [
{"match": {"content": "elasticsearch"}}
],
"filter": [
{"term": {"author": "John Doe"}},
{"range": {"views": {"gte": 1000}}}
],
"should": [
{"match": {"tags": "tutorial"}}
],
"must_not": [
{"term": {"tags": "deprecated"}}
]
}
}
)
# Phrase match
response = es.search(
index="articles",
query={
"match_phrase": {
"content": "search engine"
}
}
)
# Wildcard query
response = es.search(
index="articles",
query={
"wildcard": {
"title": "elastic*"
}
}
)
# Fuzzy query (typo tolerance)
response = es.search(
index="articles",
query={
"fuzzy": {
"title": {
"value": "elasticsarch", # Typo
"fuzziness": "AUTO"
}
}
}
)
Filtering and Sorting
# Date range filter
response = es.search(
index="articles",
query={
"bool": {
"must": {"match_all": {}},
"filter": {
"range": {
"published_date": {
"gte": "2024-01-01",
"lte": "2024-12-31"
}
}
}
}
},
sort=[
{"published_date": {"order": "desc"}},
{"views": {"order": "desc"}}
],
size=10,
from_=0 # Pagination offset
)
# Source filtering
response = es.search(
index="articles",
query={"match_all": {}},
_source=["title", "author", "published_date"]
)
Aggregations
Metrics Aggregations
# Average, sum, min, max
response = es.search(
index="articles",
query={"match_all": {}},
aggs={
"avg_views": {"avg": {"field": "views"}},
"total_views": {"sum": {"field": "views"}},
"max_rating": {"max": {"field": "rating"}},
"view_stats": {"stats": {"field": "views"}}
},
size=0 # Don't return documents, just aggregations
)
print(f"Average views: {response['aggregations']['avg_views']['value']}")
print(f"Total views: {response['aggregations']['total_views']['value']}")
stats = response['aggregations']['view_stats']
print(f"Stats: min={stats['min']}, max={stats['max']}, avg={stats['avg']}")
Bucket Aggregations
# Terms aggregation (group by)
response = es.search(
index="articles",
aggs={
"by_author": {
"terms": {"field": "author", "size": 10}
},
"by_tag": {
"terms": {"field": "tags", "size": 20}
}
},
size=0
)
for bucket in response['aggregations']['by_author']['buckets']:
print(f"Author: {bucket['key']}, Count: {bucket['doc_count']}")
# Date histogram
response = es.search(
index="articles",
aggs={
"articles_over_time": {
"date_histogram": {
"field": "published_date",
"calendar_interval": "month"
}
}
},
size=0
)
# Range aggregation
response = es.search(
index="articles",
aggs={
"view_ranges": {
"range": {
"field": "views",
"ranges": [
{"to": 100},
{"from": 100, "to": 1000},
{"from": 1000}
]
}
}
},
size=0
)
Nested Aggregations
# Aggregation within aggregation
response = es.search(
index="articles",
aggs={
"by_author": {
"terms": {"field": "author"},
"aggs": {
"avg_views": {"avg": {"field": "views"}},
"top_articles": {
"top_hits": {
"size": 3,
"sort": [{"views": "desc"}],
"_source": ["title", "views"]
}
}
}
}
},
size=0
)
for author_bucket in response['aggregations']['by_author']['buckets']:
print(f"\nAuthor: {author_bucket['key']}")
print(f" Articles: {author_bucket['doc_count']}")
print(f" Avg views: {author_bucket['avg_views']['value']:.0f}")
print(" Top articles:")
for hit in author_bucket['top_articles']['hits']['hits']:
print(f" - {hit['_source']['title']}: {hit['_source']['views']} views")
Search Suggestions
Autocomplete
# Create index with completion suggester
mapping = {
"mappings": {
"properties": {
"title": {"type": "text"},
"title_suggest": {
"type": "completion",
"contexts": [
{"name": "category", "type": "category"}
]
}
}
}
}
es.indices.create(index="products", body=mapping, ignore=400)
# Index with suggestions
doc = {
"title": "iPhone 15 Pro",
"title_suggest": {
"input": ["iPhone 15 Pro", "iPhone", "Apple iPhone"],
"contexts": {"category": ["electronics"]}
}
}
es.index(index="products", document=doc)
# Search suggestions
response = es.search(
index="products",
suggest={
"product_suggest": {
"prefix": "iph",
"completion": {
"field": "title_suggest",
"size": 5,
"contexts": {
"category": ["electronics"]
}
}
}
}
)
for suggestion in response['suggest']['product_suggest'][0]['options']:
print(f"Suggestion: {suggestion['text']}")
Search-as-you-type
# Create mapping for search-as-you-type
mapping = {
"mappings": {
"properties": {
"name": {
"type": "search_as_you_type"
}
}
}
}
es.indices.create(index="search_index", body=mapping, ignore=400)
# Search
response = es.search(
index="search_index",
query={
"multi_match": {
"query": "qui",
"type": "bool_prefix",
"fields": [
"name",
"name._2gram",
"name._3gram"
]
}
}
)
FastAPI Search Service
from fastapi import FastAPI, Query, HTTPException
from elasticsearch import Elasticsearch, NotFoundError
from pydantic import BaseModel
from typing import List, Optional
app = FastAPI()
es = Elasticsearch("http://localhost:9200")
class Article(BaseModel):
title: str
content: str
author: str
tags: List[str]
class SearchResult(BaseModel):
id: str
score: float
title: str
author: str
highlight: Optional[dict] = None
@app.post("/articles")
async def create_article(article: Article):
response = es.index(index="articles", document=article.dict())
return {"id": response["_id"]}
@app.get("/search", response_model=List[SearchResult])
async def search_articles(
q: str = Query(..., min_length=1),
author: Optional[str] = None,
tags: Optional[List[str]] = Query(None),
page: int = 1,
size: int = 10
):
# Build query
must = [{"multi_match": {"query": q, "fields": ["title^2", "content"]}}]
filters = []
if author:
filters.append({"term": {"author": author}})
if tags:
filters.append({"terms": {"tags": tags}})
query = {
"bool": {
"must": must,
"filter": filters
}
}
# Execute search
response = es.search(
index="articles",
query=query,
highlight={
"fields": {
"title": {},
"content": {"fragment_size": 150}
}
},
from_=(page - 1) * size,
size=size
)
# Format results
results = []
for hit in response["hits"]["hits"]:
results.append(SearchResult(
id=hit["_id"],
score=hit["_score"],
title=hit["_source"]["title"],
author=hit["_source"]["author"],
highlight=hit.get("highlight")
))
return results
@app.get("/suggest")
async def suggest(prefix: str = Query(..., min_length=1)):
response = es.search(
index="articles",
suggest={
"title_suggest": {
"prefix": prefix,
"completion": {"field": "title_suggest", "size": 5}
}
}
)
suggestions = [
opt["text"]
for opt in response["suggest"]["title_suggest"][0]["options"]
]
return {"suggestions": suggestions}
Summary
| Operation | Method |
|---|---|
| Index document | es.index() |
| Search | es.search() |
| Full-text | match, multi_match |
| Filter | bool with filter |
| Aggregations | aggs parameter |
| Suggestions | completion type |
Elasticsearch provides powerful, scalable search capabilities for building modern search applications.
Advertisement
Moshiour Rahman
Software Architect & AI Engineer
Enterprise software architect with deep expertise in financial systems, distributed architecture, and AI-powered applications. Building large-scale systems at Fortune 500 companies. Specializing in LLM orchestration, multi-agent systems, and cloud-native solutions. I share battle-tested patterns from real enterprise projects.
Related Articles
Redis Caching: Complete Guide to High-Performance Data Caching
Master Redis caching for web applications. Learn cache strategies, data structures, pub/sub, sessions, and build scalable caching solutions.
DevOpsStop Wrestling YAML: How to Deploy 50 AI Models with Python Loops
Infrastructure as Code shouldn't be a copy-paste nightmare. Learn how to use Pulumi and Python to programmatically deploy scalable AI infrastructure without the YAML fatigue.
DevOpsPostgreSQL Advanced Guide: From Queries to Performance Tuning
Master PostgreSQL with advanced SQL queries, indexing strategies, performance optimization, JSON support, and production database management.
Comments
Comments are powered by GitHub Discussions.
Configure Giscus at giscus.app to enable comments.