Elasticsearch API by Python
Basics
import requests
# Cluster health
# GET _cluster/health
response = requests.get('http://localhost:9200/_cluster/health')
response.json() # dict
# Node info
# GET _nodes/stats
response = requests.get('http://localhost:9200/_nodes/stats')
response.json()
Create, Read, Update, and Delete (CRUD)
Create
# create index
response = requests.put('http://localhost:9200/tutorial_curl')
response.json()
# create index with a mapping
headers = {
# Already added when you pass json=
# 'Content-Type': 'application/json',
}
json_data = {
'mappings': {
'properties': {
'Firstname': {
'type': 'keyword',
},
'Lastname': {
'type': 'long',
},
},
},
}
response = requests.put('http://localhost:9200/tutorial_curl', headers=headers, json=json_data)
Insert
# insert document
headers = {
# Already added when you pass json=
# 'Content-Type': 'application/json',
}
json_data = {
'Firstname': 'Lin',
'Lastname': 'Chen',
}
response = requests.post('http://localhost:9200/tutorial_curl/_doc', headers=headers, json=json_data)
# insert a document with an id
headers = {
# Already added when you pass json=
# 'Content-Type': 'application/json',
}
json_data = {
'Firstname': 'Lin',
'Lastname': 'Chen',
}
response = requests.post('http://localhost:9200/tutorial_curl/_doc/1', headers=headers, json=json_data)
Read
# read document
response = requests.get('http://localhost:9200/tutorial_curl/_doc/1')
response.json()
Update
# update document
headers = {
# Already added when you pass json=
# 'Content-Type': 'application/json',
}
json_data = {
'doc': {
'Firstname': 'Unknown',
'Lastname': 'Unknown',
},
}
response = requests.post('http://localhost:9200/tutorial_curl/_update/1', headers=headers, json=json_data)
Delete
# delete index
response = requests.delete('http://localhost:9200/tutorial_curl')
response.json()
# delete document
response = requests.delete('http://localhost:9200/tutorial_curl/_doc/1')
# delete by query
headers = {
# Already added when you pass json=
# 'Content-Type': 'application/json',
}
json_data = {
'query': {
'match': {
'Firstname': {
'query': 'Lin',
},
},
},
}
response = requests.post('http://localhost:9200/tutorial_curl/_delete_by_query', headers=headers, json=json_data)
Bulk
Insert
import requests
headers = {
'Content-Type': 'application/json',
}
# \n is mandatory at the end of each line
data = '{"index": {"_index": "baseline_1", "_id":3}}\n{"Absolute vorticity": 0.0001307,"Apparent temperature": 232.5}\n{"index": {"_index": "baseline_1", "_id":4}}\n{"Absolute vorticity": 0.0001307,"Apparent temperature": 233.6}\n'
response = requests.post('http://localhost:9200/_bulk', headers=headers, data=data)
Delete
import requests
headers = {
# Already added when you pass json= but not when you pass data=
# 'Content-Type': 'application/json',
}
params = {
'conflicts': 'proceed',
}
json_data = {
'query': {
'match_all': {},
},
}
response = requests.post('http://localhost/index_name/_delete_by_query', params=params, headers=headers, json=json_data)
Query
# by default, elasticsearch search the first 10,000 hits and return 10 of them
headers = {
# Already added when you pass json=
# 'Content-Type': 'application/json',
}
json_data = {
'query': {
'range': {
'date': {
'gte': '2017-05-28T00:00:00.000-04:00',
'lt': '2017-12-26T00:00:00.000-05:00',
},
},
},
}
response = requests.get('http://localhost:9200/news_headlines2/_search', headers=headers, json=json_data)
response.json()['hits']['hits']
# search all the matched documents and return a specific number of them
headers = {
# Already added when you pass json=
# 'Content-Type': 'application/json',
}
json_data = {
'query': {
'range': {
'date': {
'gte': '2017-05-28T00:00:00.000-04:00',
'lt': '2017-12-26T00:00:00.000-05:00',
},
},
},
"track_total_hits": True,
"size": 5
}
response = requests.get('http://localhost:9200/news_headlines2/_search', headers=headers, json=json_data)
response.json()['hits']['hits']
Aggregation
headers = {
# Already added when you pass json=
# 'Content-Type': 'application/json',
}
json_data = {
'aggs': {
'by_category': {
'terms': {
'field': 'category',
'size': 100,
},
},
},
}
response = requests.get('http://localhost:9200/news_headlines/_search', headers=headers, json=json_data)
response.json()['aggregations']['by_category']['buckets']
Paging
paging less than 10,000 hits
headers = {
# Already added when you pass json=
# 'Content-Type': 'application/json',
}
json_data = {
'query': {
'range': {
'date': {
'gte': '2017-05-28T00:00:00.000-04:00',
'lt': '2017-12-26T00:00:00.000-05:00',
},
},
},
"track_total_hits": True,
"from": 9000,
"size": 2
}
response = requests.get('http://localhost:9200/news_headlines2/_search', headers=headers, json=json_data)
Paging more than 10,000 hits
# step 1, create a pit id
# Point in Time (pit) is a lightweight view of the request results
params = {
'keep_alive': '1m',
}
response = requests.post('http://localhost:9200/news_headlines2/_pit', params=params)
pit_id = response.json()['id']
# step 2, get the first page
import requests
headers = {
# Already added when you pass json= but not when you pass data=
# 'Content-Type': 'application/json',
}
json_data = {
'size': 10,
'query': {
'range': {
'date': {
'gte': '2017-05-28T00:00:00.000-04:00',
'lt': '2017-12-26T00:00:00.000-05:00',
},
},
},
'pit': {
'id': pit_id,
'keep_alive': '1m',
},
'sort': [
{
'date': 'asc',
},
],
}
response = requests.get('http://localhost:9200/_search', headers=headers, json=json_data)
response.json()['hits']['hits']
# step 3, get the next page using the last hit’s sort values in the previous page
import requests
headers = {
# Already added when you pass json= but not when you pass data=
# 'Content-Type': 'application/json',
}
json_data = {
'size': 1000,
'query': {
'range': {
'date': {
'gte': '2017-05-28T00:00:00.000-04:00',
'lt': '2017-12-26T00:00:00.000-05:00',
},
},
},
'pit': {
'id': pit_id,
'keep_alive': '1m',
},
'sort': [
{
'date': 'asc',
},
],
'search_after': [
1496016000000, 67206
],
}
response = requests.get('http://localhost:9200/_search', headers=headers, json=json_data)
response.json()['hits']['hits']
Reference
Point in time API
Paginate search results
Convert curl commands to Python, JavaScript, PHP, R, Go, Rust, Elixir, Java, MATLAB, Dart, CFML, Ansible URI, Strest or JSON