Elasticsearch API by Python
Basics
import requests

# Cluster health
# GET _cluster/health
response = requests.get('http://localhost:9200/_cluster/health')
response.json() # dict

# Node info
# GET _nodes/stats
response = requests.get('http://localhost:9200/_nodes/stats')
response.json()
            
Create, Read, Update, and Delete (CRUD)
  • Create
  • # create index
    response = requests.put('http://localhost:9200/tutorial_curl')
    response.json()
    
    # create index with a mapping
    headers = {
        # Already added when you pass json=
        # 'Content-Type': 'application/json',
    }
    
    json_data = {
        'mappings': {
            'properties': {
                'Firstname': {
                    'type': 'keyword',
                },
                'Lastname': {
                    'type': 'long',
                },
            },
        },
    }
    
    response = requests.put('http://localhost:9200/tutorial_curl', headers=headers, json=json_data)
                
  • Insert
  • # insert document
    headers = {
        # Already added when you pass json=
        # 'Content-Type': 'application/json',
    }
    
    json_data = {
        'Firstname': 'Lin',
        'Lastname': 'Chen',
    }
    
    response = requests.post('http://localhost:9200/tutorial_curl/_doc', headers=headers, json=json_data)
    
    # insert a document with an id
    headers = {
        # Already added when you pass json=
        # 'Content-Type': 'application/json',
    }
    
    json_data = {
        'Firstname': 'Lin',
        'Lastname': 'Chen',
    }
    
    response = requests.post('http://localhost:9200/tutorial_curl/_doc/1', headers=headers, json=json_data)
                
  • Read
  • # read document
    response = requests.get('http://localhost:9200/tutorial_curl/_doc/1')
    response.json()
                
  • Update
  • # update document
    headers = {
        # Already added when you pass json=
        # 'Content-Type': 'application/json',
    }
    
    json_data = {
        'doc': {
            'Firstname': 'Unknown',
            'Lastname': 'Unknown',
        },
    }
    
    response = requests.post('http://localhost:9200/tutorial_curl/_update/1', headers=headers, json=json_data)
                
  • Delete
  • # delete index
    response = requests.delete('http://localhost:9200/tutorial_curl')
    response.json()
    
    # delete document
    response = requests.delete('http://localhost:9200/tutorial_curl/_doc/1')
    
    # delete by query
    headers = {
        # Already added when you pass json=
        # 'Content-Type': 'application/json',
    }
    
    json_data = {
        'query': {
            'match': {
                'Firstname': {
                    'query': 'Lin',
                },
            },
        },
    }
    
    response = requests.post('http://localhost:9200/tutorial_curl/_delete_by_query', headers=headers, json=json_data)
                
    Bulk
  • Insert
  • import requests
    
    headers = {
        'Content-Type': 'application/json',
    }
    
    # \n is mandatory at the end of each line
    data = '{"index": {"_index": "baseline_1", "_id":3}}\n{"Absolute vorticity": 0.0001307,"Apparent temperature": 232.5}\n{"index": {"_index": "baseline_1", "_id":4}}\n{"Absolute vorticity": 0.0001307,"Apparent temperature": 233.6}\n'
    
    response = requests.post('http://localhost:9200/_bulk', headers=headers, data=data)
                
  • Delete
  • import requests
    
    headers = {
        # Already added when you pass json= but not when you pass data=
        # 'Content-Type': 'application/json',
    }
    
    params = {
        'conflicts': 'proceed',
    }
    
    json_data = {
        'query': {
            'match_all': {},
        },
    }
    
    response = requests.post('http://localhost/index_name/_delete_by_query', params=params, headers=headers, json=json_data)
                
    Query
    # by default, elasticsearch search the first 10,000 hits and return 10 of them
    headers = {
        # Already added when you pass json=
        # 'Content-Type': 'application/json',
    }
    
    json_data = {
        'query': {
            'range': {
                'date': {
                    'gte': '2017-05-28T00:00:00.000-04:00',
                    'lt': '2017-12-26T00:00:00.000-05:00',
                },
            },
        },
    }
    
    response = requests.get('http://localhost:9200/news_headlines2/_search', headers=headers, json=json_data)
    response.json()['hits']['hits']
                
    # search all the matched documents and return a specific number of them
    headers = {
        # Already added when you pass json=
        # 'Content-Type': 'application/json',
    }
    
    json_data = {
        'query': {
            'range': {
                'date': {
                    'gte': '2017-05-28T00:00:00.000-04:00',
                    'lt': '2017-12-26T00:00:00.000-05:00',
                },
            },
        },
        "track_total_hits": True,
        "size": 5
    }
    
    response = requests.get('http://localhost:9200/news_headlines2/_search', headers=headers, json=json_data)
    response.json()['hits']['hits']
                
    Aggregation
    headers = {
        # Already added when you pass json=
        # 'Content-Type': 'application/json',
    }
    
    json_data = {
        'aggs': {
            'by_category': {
                'terms': {
                    'field': 'category',
                    'size': 100,
                },
            },
        },
    }
    
    response = requests.get('http://localhost:9200/news_headlines/_search', headers=headers, json=json_data)
    response.json()['aggregations']['by_category']['buckets']
                
    Paging
  • paging less than 10,000 hits
  • headers = {
        # Already added when you pass json=
        # 'Content-Type': 'application/json',
    }
    
    json_data = {
        'query': {
            'range': {
                'date': {
                    'gte': '2017-05-28T00:00:00.000-04:00',
                    'lt': '2017-12-26T00:00:00.000-05:00',
                },
            },
        },
        "track_total_hits": True,
        "from": 9000,
        "size": 2
    }
    
    response = requests.get('http://localhost:9200/news_headlines2/_search', headers=headers, json=json_data)
                
  • Paging more than 10,000 hits
  • # step 1, create a pit id
    # Point in Time (pit) is a lightweight view of the request results
    params = {
        'keep_alive': '1m',
    }
    
    response = requests.post('http://localhost:9200/news_headlines2/_pit', params=params)
    pit_id = response.json()['id']
                
    # step 2, get the first page
    import requests
    
    headers = {
        # Already added when you pass json= but not when you pass data=
        # 'Content-Type': 'application/json',
    }
    
    json_data = {
        'size': 10,
        'query': {
            'range': {
                'date': {
                    'gte': '2017-05-28T00:00:00.000-04:00',
                    'lt': '2017-12-26T00:00:00.000-05:00',
                },
            },
        },
        'pit': {
            'id': pit_id,
            'keep_alive': '1m',
        },
        'sort': [
            {
                'date': 'asc',
            },
        ],
    }
    
    response = requests.get('http://localhost:9200/_search', headers=headers, json=json_data)
    response.json()['hits']['hits']
                
    # step 3, get the next page using the last hit’s sort values in the previous page
    import requests
    
    headers = {
        # Already added when you pass json= but not when you pass data=
        # 'Content-Type': 'application/json',
    }
    
    json_data = {
        'size': 1000,
        'query': {
            'range': {
                'date': {
                    'gte': '2017-05-28T00:00:00.000-04:00',
                    'lt': '2017-12-26T00:00:00.000-05:00',
                },
            },
        },
        'pit': {
            'id': pit_id,
            'keep_alive': '1m',
        },
        'sort': [
            {
                'date': 'asc',
            },
        ],
        'search_after': [
            1496016000000, 67206
        ],
    }
    
    response = requests.get('http://localhost:9200/_search', headers=headers, json=json_data)
    response.json()['hits']['hits']
                
    Reference
  • Point in time API
  • Paginate search results
  • Convert curl commands to Python, JavaScript, PHP, R, Go, Rust, Elixir, Java, MATLAB, Dart, CFML, Ansible URI, Strest or JSON