40 · Text Search

Text Index

Create a full-text inverted index on string fields

index

A text index tokenizes string values into individual words (stems), strips stopwords (the, a, is...), and builds an inverted index enabling fast full-text search. Only one text index per collection is allowed, but it can span multiple fields.

// Create text index on a single field
db.articles.createIndex({ title: "text" })

// Create text index spanning multiple fields
db.articles.createIndex({
  title:   "text",
  body:    "text",
  tags:    "text"
})

// Weighted text index — title matches count more than body
db.articles.createIndex(
  { title: "text", body: "text", tags: "text" },
  {
    weights: {
      title: 10,  // title match worth 10x
      tags:  5,   // tag match worth 5x
      body:  1    // body match worth 1x (default)
    },
    name: "article_text_idx",
    default_language: "english"
  }
)

// Wildcard text index — index ALL string fields in every document
db.products.createIndex({ "$**": "text" })
// Useful when document structure is dynamic or fields are unknown
// Downside: large index; may include fields you don't want searchable

// Compound text index — text + additional field for filtering
db.articles.createIndex({ body: "text", status: 1 })
// Enables: $text query + { status: "published" } filter using same index

NOTE

A collection can have at most one text index — this includes wildcard text indexes. If you need text search on different subsets of fields with different configurations, you cannot use multiple text indexes. Use the wildcard text index or Atlas Search for more flexibility.

$text Queries

Full-text search with $search, $language, $caseSensitive

queries

// Basic text search — finds any document containing "coffee" OR "espresso"
// (space-separated terms are OR'd)
db.articles.find({ $text: { $search: "coffee espresso" } })

// $text must be top-level in $match — cannot be nested in $or
// Combine with other filters using AND (all in top-level query object):
db.articles.find({
  $text:  { $search: "coffee" },
  status: "published",                // AND filter
  year:   { $gte: 2023 }             // AND filter
})

// Case-sensitive search (default is case-insensitive)
db.articles.find({
  $text: {
    $search:        "MongoDB",
    $caseSensitive: true   // "mongodb" and "MONGODB" won't match
  }
})

// Diacritic-sensitive (default ignores accents — "cafe" matches "café")
db.articles.find({
  $text: {
    $search:             "café",
    $diacriticSensitive: true   // only exact accent match
  }
})

// Override language for this query (if index uses default_language: "english")
db.articles.find({
  $text: {
    $search:   "correndo",
    $language: "portuguese"  // uses Portuguese stemmer for this query
  }
})

Phrases & Negation

Exact phrase matching and term exclusion

syntax

// Exact phrase — wrap in escaped double quotes
db.articles.find({ $text: { $search: "\"New York City\"" } })
// Matches: "I visited New York City last summer"
// No match: "New" or "York" alone; must appear consecutively

// Multiple phrases (OR of phrases)
db.articles.find({ $text: { $search: "\"machine learning\" \"deep learning\"" } })
// Returns docs containing "machine learning" OR "deep learning"

// Negation — prefix with - to EXCLUDE term
db.articles.find({ $text: { $search: "coffee -decaf" } })
// Returns docs with "coffee" but NOT "decaf"

// Phrase + negation combined
db.articles.find({ $text: { $search: "\"New York\" -Brooklyn -Queens" } })
// Docs with "New York" but not "Brooklyn" or "Queens"

// Multi-word AND: use separate $text is NOT possible (only one $text per query)
// Workaround for AND: phrase search or post-filter in $match after text search
db.articles.aggregate([
  { $match: { $text: { $search: "coffee espresso" } } },
  { $match: {
    $expr: {
      $and: [
        { $gt: [{ $indexOfCP: ["$body", "coffee"] },   -1] },
        { $gt: [{ $indexOfCP: ["$body", "espresso"] }, -1] }
      ]
    }
  }}
])

Search Syntax Summary

Syntax	Meaning	Example
`word word`	OR — either word	`"cat dog"` → cat OR dog
`"phrase here"`	Exact phrase (consecutive)	`"\"hot dog\""` → "hot dog"
`-word`	NOT — exclude term	`"coffee -decaf"`
`"phrase" -word`	Phrase AND NOT	`"\"New York\" -Queens"`

textScore Relevance

Sort results by relevance using $meta: "textScore"

relevance

// Retrieve and sort by relevance score
db.articles.find(
  { $text: { $search: "mongodb performance index" } },
  { score: { $meta: "textScore" } }   // add score field to output
).sort({ score: { $meta: "textScore" } })  // sort by relevance descending

// In aggregation pipeline:
db.articles.aggregate([
  {
    $match: { $text: { $search: "mongodb performance index" } }
  },
  {
    $addFields: {
      relevanceScore: { $meta: "textScore" }
    }
  },
  { $sort: { relevanceScore: -1 } },
  { $limit: 10 },
  {
    $project: {
      title: 1,
      relevanceScore: 1,
      excerpt: { $substr: ["$body", 0, 200] }
    }
  }
])

// textScore is influenced by:
// 1. Field weight (title weighted 10x body in weighted index)
// 2. Term frequency in the field
// 3. Number of search terms matching
// Higher score = more relevant to the search query

// Filter by minimum relevance threshold:
db.articles.aggregate([
  { $match: { $text: { $search: "mongodb" } } },
  { $addFields: { score: { $meta: "textScore" } } },
  { $match: { score: { $gt: 1.5 } } },  // minimum relevance threshold
  { $sort: { score: -1 } }
])

Languages & Case Sensitivity

Stemming, stopwords, and multilingual support

language

Text indexes support language-specific stemming (reducing words to root form, e.g., "running" → "run") and stopword filtering (removing words like "the", "a", "is"). The default language is English.

// Create a text index with a specific language
db.articlesES.createIndex(
  { title: "text", body: "text" },
  { default_language: "spanish" }
)
// Spanish stopwords ("el", "la", "de", "que"...) removed
// Spanish stemming applied ("corriendo" stems to "corr")

// Supported languages include:
// english, french, german, spanish, portuguese, italian,
// dutch, danish, finnish, norwegian, swedish, russian,
// turkish, arabic, chinese (tokenizer only — no stemming)

// Per-document language (store language in document field)
db.articles.createIndex(
  { content: "text" },
  { language_override: "lang" }  // use the "lang" field per document
)
db.articles.insertMany([
  { content: "Le café est délicieux", lang: "french" },
  { content: "The coffee is delicious", lang: "english" },
  { content: "El café es delicioso", lang: "spanish" }
])

// Disable stemming for a field: "none" language = no stemming, no stopwords
db.codes.createIndex(
  { errorCode: "text" },
  { default_language: "none" }   // exact token matching only
)

// Default behavior (case/diacritic insensitive):
// "Coffee", "COFFEE", "coffee" all match "$search: coffee"
// "café", "cafe", "CAFE" all match "$search: cafe" (diacritics ignored)

Limitations

What native text search cannot do

limits

Limitation	Workaround
One text index per collection	Wildcard text index, or Atlas Search
Cannot combine $text with $or, $nor, $not at top level	Use $match in aggregation pipeline
Cannot combine $text with $near in same query	Two separate queries, or Atlas Search
No prefix matching (no "mongo*" wildcard)	Atlas Search (supports wildcard, regex)
No fuzzy matching (typo tolerance)	Atlas Search with fuzzy option
No autocomplete / partial word matching	Atlas Search with autocomplete field type
No synonym support	Atlas Search synonym mappings
No search result highlighting	Atlas Search with highlight option
No faceted search	Atlas Search with $searchMeta facets
Sharded clusters: $text scatters to all shards	Atlas Search on dedicated cluster

WARN

Native $text search is suitable for basic keyword search on small to medium collections. For production search features (autocomplete, fuzzy, facets, relevance tuning), use Atlas Search which is built on Apache Lucene and supports all advanced search capabilities without additional infrastructure.

Atlas Search

Lucene-powered $search — the production-grade alternative

atlas

Atlas Search integrates Apache Lucene with MongoDB Atlas. It uses a dedicated search index (separate from regular MongoDB indexes) and is queried via the $search aggregation stage. It must always be the first stage in the pipeline.

// Create a search index (Atlas UI or Atlas CLI — not in mongosh)
// Atlas UI → Search → Create Index → JSON editor:
{
  "mappings": {
    "dynamic": true   // auto-index all string fields
  }
}

// Basic Atlas Search query
db.articles.aggregate([
  {
    $search: {
      index: "default",    // search index name
      text: {
        query: "mongodb performance",
        path:  "title"     // field to search (or ["title", "body"])
      }
    }
  },
  { $limit: 10 },
  { $project: { title: 1, score: { $meta: "searchScore" } } }
])

// Fuzzy matching (typo tolerance)
db.articles.aggregate([
  {
    $search: {
      text: {
        query: "mongorb",   // typo: "mongodb"
        path:  "title",
        fuzzy: { maxEdits: 2, prefixLength: 3 }  // 2 char edits allowed
      }
    }
  }
])

// Autocomplete (partial word matching as user types)
// Requires: search index with autocomplete field type configured
db.products.aggregate([
  {
    $search: {
      autocomplete: {
        query: "wirele",   // prefix → returns "wireless", "wireless headphones"
        path:  "name"
      }
    }
  },
  { $limit: 5 },
  { $project: { name: 1 } }
])

// Compound query (must/should/mustNot/filter)
db.articles.aggregate([
  {
    $search: {
      compound: {
        must:   [{ text: { query: "mongodb", path: "body" } }],
        should: [{ text: { query: "atlas", path: "body" } }],    // boosts score
        filter: [{ equals: { path: "status", value: "published" } }]
      }
    }
  }
])

// Search with highlighting
db.articles.aggregate([
  {
    $search: {
      text: { query: "coffee", path: "body" },
      highlight: { path: "body" }
    }
  },
  {
    $project: {
      title: 1,
      highlights: { $meta: "searchHighlights" }
    }
  }
])