Elasticsearch API
Basics
# Syntax
# GET _API/parameter
# Cluster health
GET _cluster/health
# Node info
GET _nodes/stats
# Get index info
# GET index_name
GET c_house_price
# List indices
GET /_cat/indices
Create, Read, Update, and Delete (CRUD)
Create index
# PUT index_name
PUT c_house_price
# Create index with mapping
PUT ecommerce_data
{
"mappings": {
"properties": {
"Country": {
"type": "keyword"
},
"CustomerID": {
"type": "long"
},
"Description": {
"type": "text"
},
"InvoiceDate": {
"type": "date",
"format": "M/d/yyyy H:m"
},
"InvoiceNo": {
"type": "keyword"
},
"Quantity": {
"type": "long"
},
"StockCode": {
"type": "keyword"
},
"UnitPrice": {
"type": "double"
}
}
}
}
Insert document
# POST index_name/_doc
# PUT does not work
POST c_house_price/_doc
{
"first_name": "Lin",
"last_name": "Chen"
}
# Insert a document with a specific id
# PUT index_name/_doc/id
# POST index_name/_doc/id
# if id already exists, the existing document is overwritten by the new document
PUT c_house_price/_doc/1
{
"first_name": "John",
"last_name": "Starburst"
}
# if id already exist, return 409 error
# PUT index_name/_create/id
# POST index_name/_create/id
PUT c_house_price/_create/3
{
"first_name": "Lin",
"candy": "Jolly Ranchers"
}
# reindex to create an index from an original index
# run the task asynchronously to avoid timeout
POST _reindex?wait_for_completion=false
{
"source": {
"index": "e_commerce"
},
"dest": {
"index": "ecommerce_data"
}
}
# use task id to check the progress
GET _tasks/task_id
Read a document
# GET index_name/_doc/id
GET c_house_price/_doc/1
Update a document
# POST index_name/_update/id
POST c_house_price/_update/1
{
"doc":{
"last_name": "Chen"
}
}
Delete a document
# DELETE index_name/_doc/id
DELETE c_house_price/_doc/1
# Delete an index
# DELETE index_name
DELETE c_house_price
# Delete by query
POST ecommerce_data/_delete_by_query
{
"query": {
"range": {
"UnitPrice": {
"lte": 0
}
}
}
}
Bulk
Insert
POST _bulk
{"index": {"_index": "baseline_1", "_id":1}}
{"Absolute vorticity": 0.0001307,"Apparent temperature": 232.5}
{"index": {"_index": "baseline_1", "_id":2}}
{"Absolute vorticity": 0.0001307,"Apparent temperature": 233.6}
Update
POST _bulk
{"update": {"_index": "baseline_1", "_id": 1}}
{"doc": {"Apparent temperature": 242.5}}
{"update": {"_index": "baseline_1", "_id":2}}
{"doc": {"Apparent temperature": 243.6}}
Delete
POST _bulk
{"delete": {"_index": "baseline_1", "_id": 1}}
{"delete": {"_index": "baseline_1", "_id":2}}
Query
Query Types
- match_all, match all query
- match, matches a text or phrase
- match_phrase, matches phrase
- multi_match, matches a text or phrase with more than one field
- query_string, query a string
- term, deal with structured data like numbers, dates and enums
- range, find the objects having values between the ranges of values given
- bool, compound query
- exists query
- missing query
- wildcard or regexp query
- geo query
# Search
# GET index_name/_search
GET news_headlines/_search # return 10,000 hits
# GET index_pattern/_search
GET c*/_search
# return exact total number of hits
GET news_headlines/_search
{
"track_total_hits": true
}
match all query
POST news_headlines/_search
{
"query":{
"match_all":{}
}
}
Search between two date
GET enter_name_of_the_index_here/_search
{
"query": {
"Specify the type of query here": {
"Enter name of the field here": {
"gte": "Enter lowest value of the range here",
"lte": "Enter highest value of the range here"
}
}
}
}
GET news_headlines/_search
{
"query":{
"range": {
"date": {
"gte": "2017-05-28T00:00:00.000-04:00",
"lt": "2017-12-26T00:00:00.000-05:00"
}
}
}
}
Match Query
- Order and proximity of terms are not important
GET Enter_name_of_index_here/_search
{
"query": {
"match": {
"Specify the field you want to search": {
"query": "Enter search terms"
}
}
}
}
# all, if any one of the term match, return as a hit
GET news_headlines/_search
{
"query": {
"match": {
"headline": {
"query": "Khloe Kardashian Kendall Jenner" # search keywords in headline
}
}
}
}
# and, all terms need to match
GET news_headlines/_search
{
"query": {
"match": {
"headline": {
"query": "Khloe Kardashian Kendall Jenner",
"operator": "and"
}
}
}
# specify the minimum number of terms a document should have to be included
GET news_headlines/_search
{
"query": {
"match": {
"headline": {
"query": "Khloe Kardashian Kendall Jenner",
"minimum_should_match": 3
}
}
}
}
Match_phase Query
- Order and proximity of terms are important
GET Enter_name_of_index_here/_search
{
"query": {
"match_phrase": {
"Specify the field you want to search": {
"query": "Enter search terms"
}
}
}
}
GET news_headlines/_search
{
"query": {
"match_phrase": {
"headline": {
"query": "Shape of You"
}
}
}
}
Query Multiple Fields
GET Enter_the_name_of_the_index_here/_search
{
"query": {
"multi_match": {
"query": "Enter search terms here",
"fields": [
"List the field you want to search over",
"List the field you want to search over",
"List the field you want to search over"
]
}
}
}
GET news_headlines/_search
{
"query": {
"multi_match": {
"query": "Michelle Obama",
"fields": [
"headline",
"short_description",
"authors"
]
}
}
}
# designate one field to carry more weight than the others
# by ^number
GET Enter_the_name_of_the_index_here/_search
{
"query": {
"multi_match": {
"query": "Enter search terms",
"fields": [
"List field you want to boost^2",
"List field you want to search over",
"List field you want to search over^3"
]
}
}
}
GET news_headlines/_search
{
"query": {
"multi_match": {
"query": "Michelle Obama",
"fields": [
"headline^2",
"short_description",
"authors"
]
}
}
}
# improve precision with phrase type match
GET Enter_the_name_of_the_index_here/_search
{
"query": {
"multi_match": {
"query": "Enter search phrase",
"fields": [
"List field you want to boost^2",
"List field you want to search over",
"List field you want to search over"
],
"type": "phrase"
}
}
}
GET news_headlines/_search
{
"query": {
"multi_match": {
"query": "party planning",
"fields": [
"headline^2",
"short_description"
],
"type": "phrase"
}
}
}
Combine multiple queries
GET index_name/_search
{
"query":{
"bool":{
"must":[{}], # items must appear in matching documents, AND
"must_not":[{}], # NOT
"should":[{}], # at least one of items appear in matching documents, OR
"filter":[{}] # query filter
}
}
}
# must
GET news_headlines/_search
{
"query": {
"bool": {
"must": [
{
"match_phrase": {
"headline": "Michelle Obama"
}
},
{
"match": {
"category": "POLITICS"
}
}
]
}
}
}
# must_not
GET news_headlines/_search
{
"query": {
"bool": {
"must": {
"match_phrase": {
"headline": "Michelle Obama"
}
},
"must_not":[
{
"match": {
"category": "WEDDINGS"
}
}
]
}
}
}
# should
GET news_headlines/_search
{
"query": {
"bool": {
"must": [
{
"match_phrase": {
"headline": "Michelle Obama"
}
}
],
"should":[
{
"match_phrase": {
"category": "BLACK VOICES"
}
}
]
}
}
}
# filter
GET news_headlines/_search
{
"query": {
"bool": {
"must": [
{
"match_phrase": {
"headline": "Michelle Obama"
}
}
],
"filter":{
"range":{
"date": {
"gte": "2014-03-25",
"lte": "2016-03-25"
}
}
}
}
}
}
GET news_headlines/_search
{
"query": {
"bool": {
"should": [
{
"match_phrase": {
"category": "ENTERTAINMENT"
}
}
],
"filter": [
{
"exists": {
"field": "headline"
}
}
]
}
}
}
Geo queries
POST /geo_example/_doc?refresh
{
"name": "Chapter One, London, UK",
"location": {
"type": "point",
"coordinates": [11.660544, 57.800286]
}
}
Aggregation
Metric Aggregations
- compute numeric values
- sum, min, max, avg, cardinality and etc.
Bucket Aggregations
- aggregate on several subsets of documents
- group documents into several sets of documents called bucket
- all documents in a bucket share a common criteria
- types
- Date Histogram Aggregation
- Fixed_interval
- milliseconds (ms)
- seconds (s)
- minutes (m)
- hours (h)
- days (d)
- Calendar_interval
- minute, 1m
- hour, 1h
- day, 1d
- week, 1w
- month, 1M
- quarter, 1q
- year, 1y
- Histogram Aggregation
- creates buckets based on any numerical interval
- Range Aggregation
- allows to define intervals of varying sizes
- Terms Aggregation
- creates a new bucket for every unique term it encounters for the specified field
GET Enter_name_of_the_index_here/_search
{
"aggs": {
"Name your aggregation here": {
"Specify aggregation type here": { # terms, stats, geodistance
"field": "Name the field you want to aggregate here",
"size": "State how many buckets you want returned here"
}
}
}
}
GET news_headlines/_search
{
"aggs": {
"by_category": { # aggregation name
"terms": { # aggregation type
"field": "category", # filed name
"size": 100 # number of buckets returns
}
}
}
}
Metric Aggregations
GET Enter_name_of_the_index_here/_search
{
"aggs": {
"Name your aggregations here": {
"sum": {
"field": "Name the field you want to aggregate on here"
}
}
}
}
# sum
GET ecommerce_data/_search
{
"size": 0, # prevents Elasticsearch from fetching the top 10 hits
"aggs": {
"sum_unit_price": {
"sum": {
"field": "UnitPrice"
}
}
}
}
# stats, list count, min, max, avg, sum
GET ecommerce_data/_search
{
"size": 0,
"aggs": {
"all_stats_unit_price": {
"stats": {
"field": "UnitPrice"
}
}
}
}
# Cardinality Aggregation
GET ecommerce_data/_search
{
"size":0,
"aggs": {
"sum_unit_price": {
"cardinality": {
"field": "CustomerID"
}
}
}
}
# Limiting the scope of an aggregation
GET ecommerce_data/_search
{
"size": 0,
"query": {
"match": {
"Country": "Germany"
}
},
"aggs": {
"germany_average_unit_price": {
"avg": {
"field": "UnitPrice"
}
}
}
}
Bucket Aggregations
# Date Histogram Aggregation
GET ecommerce_data/_search
{
"size": 0,
"aggs": {
"Name your aggregations here": {
"date_histogram": {
"field":"Name the field you want to aggregate on here",
"fixed_interval": "Specify the interval here"
}
}
}
}
GET ecommerce_data/_search
{
"size": 0,
"aggs": {
"transactions_by_8_hrs": {
"date_histogram": {
"field": "InvoiceDate",
"fixed_interval": "8h"
}
}
}
}
GET ecommerce_data/_search
{
"size": 0,
"aggs": {
"Name your aggregations here": {
"date_histogram": {
"field":"Name the field you want to aggregate on here",
"calendar_interval": "Specify the interval here"
}
}
}
}
GET ecommerce_data/_search
{
"size": 0,
"aggs": {
"transactions_by_month": {
"date_histogram": {
"field": "InvoiceDate",
"calendar_interval": "1M"
"order": {
"_key": "desc"
}
}
}
}
}
# Histogram Aggregation
GET ecommerce_data/_search
{
"size": 0,
"aggs": {
"Name your aggregations here": {
"histogram": {
"field":"Name the field you want to aggregate on here",
"interval": Specify the interval here
}
}
}
}
GET ecommerce_data/_search
{
"size": 0,
"aggs": {
"transactions_per_price_interval": {
"histogram": {
"field": "UnitPrice",
"interval": 10
}
}
}
}
# Range Aggregation
GET Enter_name_of_the_index_here/_search
{
"size": 0,
"aggs": {
"Name your aggregations here": {
"range": {
"field": "Name the field you want to aggregate on here",
"ranges": [
{
"to": x
},
{
"from": x,
"to": y
},
{
"from": z
}
]
}
}
}
}
GET ecommerce_data/_search
{
"size": 0,
"aggs": {
"transactions_per_custom_price_ranges": {
"range": {
"field": "UnitPrice",
"ranges": [
{
"to": 50
},
{
"from": 50,
"to": 200
},
{
"from": 200
}
]
}
}
}
}
# Terms Aggregation
GET Enter_name_of_the_index_here/_search
{
"aggs": {
"Name your aggregations here": {
"terms": {
"field": "Name the field you want to aggregate on here",
"size": State how many top results you want returned here
}
}
}
}
GET ecommerce_data/_search
{
"size": 0,
"aggs": {
"top_5_customers": {
"terms": {
"field": "CustomerID",
"size": 5
}
}
}
}
Combined Aggregations
GET ecommerce_data/_search
{
"size": 0,
"aggs": {
"transactions_per_day": {
"date_histogram": {
"field": "InvoiceDate",
"calendar_interval": "day"
},
"aggs": {
"daily_revenue": {
"sum": {
"script": {
"source": "doc['UnitPrice'].value * doc['Quantity'].value"
}
}
}
}
}
}
}
GET ecommerce_data/_search
{
"size": 0,
"aggs": {
"transactions_per_day": {
"date_histogram": {
"field": "InvoiceDate",
"calendar_interval": "day"
},
"aggs": {
"daily_revenue": {
"sum": {
"script": {
"source": "doc['UnitPrice'].value * doc['Quantity'].value"
}
}
},
"number_of_unique_customers_per_day": {
"cardinality": {
"field": "CustomerID"
}
}
}
}
}
}
Query and Aggregation
Pull documents with query, then analyze the query data and create summary with aggregations
GET Enter_name_of_the_index_here/_search
{
"query": {
"Enter match or match_phrase here": { "Enter the name of the field": "Enter the value you are looking for" }
},
"aggs": {
"Name your aggregation here": {
"Specify aggregation type here": {
"field": "Name the field you want to aggregate here",
"size": "State how many buckets you want returned here"
}
}
}
}
GET news_headlines/_search
{
"query": {
"match": {
"category": "ENTERTAINMENT"
}
},
"aggs": {
"popular_in_entertainment": { # aggregation name
"significant_text": { # aggregation type
"field": "headline"
}
}
}
}
Mapping
defines how a document and its fields are indexed and stored
help optimize the performance of Elasticsearch and save disk space
Rules
- If you do not define a mapping ahead of time, Elasticsearch dynamically creates the mapping for you
- If you do decide to define your own mapping, you can do so at index creation
- ONE mapping is defined per index. Once the index has been created, we can only add new fields to a mapping. We CANNOT change the mapping of an existing field
- If you must change the type of an existing field, you must create a new index with the desired mapping, then reindex all documents into the new index
String types
- Text
- designed for full-text searches
- Text Analysis
- analyze the Text before it's stored into the Inverted Index, in which each record contains a term and its doc ids
- use tokens in a query in a none-case sensitive manner
- Keyword
- designed for exact searches, aggregations, and sorting
- store data using doc values, in which each record contains a doc id and original field values
- use the whole value in a search query
- Default
- if no mappings, Elasticsearch dynamically creates mapping and maps all strings to both Text and Keyword
View Mapping
GET Enter_name_of_the_index_here/_mapping
GET temp_index/_mapping
Create an index with Mapping
PUT produce_index
{
"mappings": {
"properties": {
"botanical_name": {
"enabled": false # disabled to save disk space
},
"country_of_origin": { # text and keyword
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"date_purchased": { # date
"type": "date"
},
"description": { # text
"type": "text"
},
"name": { # text
"type": "text"
},
"produce_type": { # keyword
"type": "keyword"
},
"quantity": {
"type": "long"
},
"unit_price": {
"type": "float"
},
"vendor_details": {
"enabled": false
}
}
}
}
Update Mapping
# step 1, create a new index
PUT produce_v2
{
"mappings": {
"properties": {
"botanical_name": {
"type": "text"
},
"country_of_origin": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"date_purchased": {
"type": "date"
},
"description": {
"type": "text"
},
"name": {
"type": "text"
},
"organic": {
"type": "boolean"
},
"produce_type": {
"type": "keyword"
},
"quantity": {
"type": "long"
},
"unit_price": {
"type": "float"
},
"vendor_details": {
"type": "object",
"enabled": false
}
}
}
}
# step 2: Reindex the data from the original index(produce_index) to the one you just created(produce_v2)
# reindex copys existing data from a source index to a destination index
POST _reindex
{
"source": {
"index": "produce_index"
},
"dest": {
"index": "produce_v2"
}
}
Runtime Field
Enable to create and query fields that are evaluated only at query time
Runtime fields are not physically saved in the index
# step 1. create a runtime field and add it to the mapping of the existing index
PUT Enter-name-of-index/_mapping
{
"runtime": {
"Name-your-runtime-field-here": {
"type": "Specify-field-type-here",
"script": {
"source": "Specify the formula you want executed"
}
}
}
}
PUT produce_v2/_mapping
{
"runtime": {
"total": {
"type": "double",
"script": {
"source": "emit(doc['unit_price'].value* doc['quantity'].value)"
}
}
}
}
# step 2. use runtime fields
GET Enter_name_of_the_index_here/_search
{
"size": 0,
"aggs": {
"Name your aggregations here": {
"Specify the aggregation type here": {
"field": "Name the field you want to aggregate on here"
}
}
}
}
GET produce_v2/_search
{
"size": 0,
"aggs": {
"total_expense": {
"sum": {
"field": "total"
}
}
}
}
Troubleshooting
5XX errors
- internal server error in Elasticsearch
- look at the Elasticsearch log and identify the problem
4XX errors
Reference