Elasticsearch: Complete Full-Text Search Guide

What is Elasticsearch?

Elasticsearch is a distributed search and analytics engine built on Apache Lucene. It provides real-time search, scalability, and powerful full-text search capabilities.

Use Cases

Use Case	Examples
Search	E-commerce, documentation
Logging	ELK stack, observability
Analytics	Business intelligence
APM	Application monitoring

Getting Started

Installation

# Docker (recommended)
docker run -d --name elasticsearch \
  -p 9200:9200 -p 9300:9300 \
  -e "discovery.type=single-node" \
  -e "xpack.security.enabled=false" \
  elasticsearch:8.11.0

# Python client
pip install elasticsearch

Basic Connection

from elasticsearch import Elasticsearch

# Connect to Elasticsearch
es = Elasticsearch("http://localhost:9200")

# Check connection
if es.ping():
    print("Connected to Elasticsearch")
    info = es.info()
    print(f"Version: {info['version']['number']}")

Indexing Documents

Create Index

from elasticsearch import Elasticsearch

es = Elasticsearch("http://localhost:9200")

# Define index mapping
mapping = {
    "mappings": {
        "properties": {
            "title": {"type": "text", "analyzer": "english"},
            "content": {"type": "text", "analyzer": "english"},
            "author": {"type": "keyword"},
            "tags": {"type": "keyword"},
            "published_date": {"type": "date"},
            "views": {"type": "integer"},
            "rating": {"type": "float"}
        }
    },
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    }
}

# Create index
es.indices.create(index="articles", body=mapping, ignore=400)

# Check if index exists
if es.indices.exists(index="articles"):
    print("Index created successfully")

Index Documents

# Index single document
doc = {
    "title": "Getting Started with Elasticsearch",
    "content": "Elasticsearch is a powerful search engine...",
    "author": "John Doe",
    "tags": ["elasticsearch", "search", "tutorial"],
    "published_date": "2024-01-15",
    "views": 1500,
    "rating": 4.5
}

response = es.index(index="articles", id=1, document=doc)
print(f"Indexed document: {response['_id']}")

# Bulk indexing
from elasticsearch.helpers import bulk

documents = [
    {
        "_index": "articles",
        "_id": i,
        "_source": {
            "title": f"Article {i}",
            "content": f"Content for article {i}",
            "author": "Jane Doe",
            "tags": ["python", "tutorial"],
            "published_date": "2024-01-20",
            "views": 100 * i,
            "rating": 4.0
        }
    }
    for i in range(2, 102)
]

success, failed = bulk(es, documents)
print(f"Indexed {success} documents, {failed} failed")

# Refresh index
es.indices.refresh(index="articles")

Searching

Basic Search

# Match all
response = es.search(index="articles", query={"match_all": {}})

print(f"Total hits: {response['hits']['total']['value']}")
for hit in response['hits']['hits']:
    print(f"Score: {hit['_score']}, Title: {hit['_source']['title']}")

# Match query (full-text search)
response = es.search(
    index="articles",
    query={
        "match": {
            "content": "elasticsearch search"
        }
    }
)

# Multi-match (search multiple fields)
response = es.search(
    index="articles",
    query={
        "multi_match": {
            "query": "python tutorial",
            "fields": ["title^2", "content", "tags"]  # title has 2x weight
        }
    }
)

Advanced Queries

# Bool query (combine conditions)
response = es.search(
    index="articles",
    query={
        "bool": {
            "must": [
                {"match": {"content": "elasticsearch"}}
            ],
            "filter": [
                {"term": {"author": "John Doe"}},
                {"range": {"views": {"gte": 1000}}}
            ],
            "should": [
                {"match": {"tags": "tutorial"}}
            ],
            "must_not": [
                {"term": {"tags": "deprecated"}}
            ]
        }
    }
)

# Phrase match
response = es.search(
    index="articles",
    query={
        "match_phrase": {
            "content": "search engine"
        }
    }
)

# Wildcard query
response = es.search(
    index="articles",
    query={
        "wildcard": {
            "title": "elastic*"
        }
    }
)

# Fuzzy query (typo tolerance)
response = es.search(
    index="articles",
    query={
        "fuzzy": {
            "title": {
                "value": "elasticsarch",  # Typo
                "fuzziness": "AUTO"
            }
        }
    }
)

Filtering and Sorting

# Date range filter
response = es.search(
    index="articles",
    query={
        "bool": {
            "must": {"match_all": {}},
            "filter": {
                "range": {
                    "published_date": {
                        "gte": "2024-01-01",
                        "lte": "2024-12-31"
                    }
                }
            }
        }
    },
    sort=[
        {"published_date": {"order": "desc"}},
        {"views": {"order": "desc"}}
    ],
    size=10,
    from_=0  # Pagination offset
)

# Source filtering
response = es.search(
    index="articles",
    query={"match_all": {}},
    _source=["title", "author", "published_date"]
)

Aggregations

Metrics Aggregations

# Average, sum, min, max
response = es.search(
    index="articles",
    query={"match_all": {}},
    aggs={
        "avg_views": {"avg": {"field": "views"}},
        "total_views": {"sum": {"field": "views"}},
        "max_rating": {"max": {"field": "rating"}},
        "view_stats": {"stats": {"field": "views"}}
    },
    size=0  # Don't return documents, just aggregations
)

print(f"Average views: {response['aggregations']['avg_views']['value']}")
print(f"Total views: {response['aggregations']['total_views']['value']}")

stats = response['aggregations']['view_stats']
print(f"Stats: min={stats['min']}, max={stats['max']}, avg={stats['avg']}")

Bucket Aggregations

# Terms aggregation (group by)
response = es.search(
    index="articles",
    aggs={
        "by_author": {
            "terms": {"field": "author", "size": 10}
        },
        "by_tag": {
            "terms": {"field": "tags", "size": 20}
        }
    },
    size=0
)

for bucket in response['aggregations']['by_author']['buckets']:
    print(f"Author: {bucket['key']}, Count: {bucket['doc_count']}")

# Date histogram
response = es.search(
    index="articles",
    aggs={
        "articles_over_time": {
            "date_histogram": {
                "field": "published_date",
                "calendar_interval": "month"
            }
        }
    },
    size=0
)

# Range aggregation
response = es.search(
    index="articles",
    aggs={
        "view_ranges": {
            "range": {
                "field": "views",
                "ranges": [
                    {"to": 100},
                    {"from": 100, "to": 1000},
                    {"from": 1000}
                ]
            }
        }
    },
    size=0
)

Nested Aggregations

# Aggregation within aggregation
response = es.search(
    index="articles",
    aggs={
        "by_author": {
            "terms": {"field": "author"},
            "aggs": {
                "avg_views": {"avg": {"field": "views"}},
                "top_articles": {
                    "top_hits": {
                        "size": 3,
                        "sort": [{"views": "desc"}],
                        "_source": ["title", "views"]
                    }
                }
            }
        }
    },
    size=0
)

for author_bucket in response['aggregations']['by_author']['buckets']:
    print(f"\nAuthor: {author_bucket['key']}")
    print(f"  Articles: {author_bucket['doc_count']}")
    print(f"  Avg views: {author_bucket['avg_views']['value']:.0f}")
    print("  Top articles:")
    for hit in author_bucket['top_articles']['hits']['hits']:
        print(f"    - {hit['_source']['title']}: {hit['_source']['views']} views")

Search Suggestions

Autocomplete

# Create index with completion suggester
mapping = {
    "mappings": {
        "properties": {
            "title": {"type": "text"},
            "title_suggest": {
                "type": "completion",
                "contexts": [
                    {"name": "category", "type": "category"}
                ]
            }
        }
    }
}

es.indices.create(index="products", body=mapping, ignore=400)

# Index with suggestions
doc = {
    "title": "iPhone 15 Pro",
    "title_suggest": {
        "input": ["iPhone 15 Pro", "iPhone", "Apple iPhone"],
        "contexts": {"category": ["electronics"]}
    }
}
es.index(index="products", document=doc)

# Search suggestions
response = es.search(
    index="products",
    suggest={
        "product_suggest": {
            "prefix": "iph",
            "completion": {
                "field": "title_suggest",
                "size": 5,
                "contexts": {
                    "category": ["electronics"]
                }
            }
        }
    }
)

for suggestion in response['suggest']['product_suggest'][0]['options']:
    print(f"Suggestion: {suggestion['text']}")

Search-as-you-type

# Create mapping for search-as-you-type
mapping = {
    "mappings": {
        "properties": {
            "name": {
                "type": "search_as_you_type"
            }
        }
    }
}

es.indices.create(index="search_index", body=mapping, ignore=400)

# Search
response = es.search(
    index="search_index",
    query={
        "multi_match": {
            "query": "qui",
            "type": "bool_prefix",
            "fields": [
                "name",
                "name._2gram",
                "name._3gram"
            ]
        }
    }
)

FastAPI Search Service

from fastapi import FastAPI, Query, HTTPException
from elasticsearch import Elasticsearch, NotFoundError
from pydantic import BaseModel
from typing import List, Optional

app = FastAPI()
es = Elasticsearch("http://localhost:9200")

class Article(BaseModel):
    title: str
    content: str
    author: str
    tags: List[str]

class SearchResult(BaseModel):
    id: str
    score: float
    title: str
    author: str
    highlight: Optional[dict] = None

@app.post("/articles")
async def create_article(article: Article):
    response = es.index(index="articles", document=article.dict())
    return {"id": response["_id"]}

@app.get("/search", response_model=List[SearchResult])
async def search_articles(
    q: str = Query(..., min_length=1),
    author: Optional[str] = None,
    tags: Optional[List[str]] = Query(None),
    page: int = 1,
    size: int = 10
):
    # Build query
    must = [{"multi_match": {"query": q, "fields": ["title^2", "content"]}}]
    filters = []

    if author:
        filters.append({"term": {"author": author}})
    if tags:
        filters.append({"terms": {"tags": tags}})

    query = {
        "bool": {
            "must": must,
            "filter": filters
        }
    }

    # Execute search
    response = es.search(
        index="articles",
        query=query,
        highlight={
            "fields": {
                "title": {},
                "content": {"fragment_size": 150}
            }
        },
        from_=(page - 1) * size,
        size=size
    )

    # Format results
    results = []
    for hit in response["hits"]["hits"]:
        results.append(SearchResult(
            id=hit["_id"],
            score=hit["_score"],
            title=hit["_source"]["title"],
            author=hit["_source"]["author"],
            highlight=hit.get("highlight")
        ))

    return results

@app.get("/suggest")
async def suggest(prefix: str = Query(..., min_length=1)):
    response = es.search(
        index="articles",
        suggest={
            "title_suggest": {
                "prefix": prefix,
                "completion": {"field": "title_suggest", "size": 5}
            }
        }
    )

    suggestions = [
        opt["text"]
        for opt in response["suggest"]["title_suggest"][0]["options"]
    ]
    return {"suggestions": suggestions}

Summary

Operation	Method
Index document	`es.index()`
Search	`es.search()`
Full-text	`match`, `multi_match`
Filter	`bool` with `filter`
Aggregations	`aggs` parameter
Suggestions	`completion` type

Elasticsearch provides powerful, scalable search capabilities for building modern search applications.