Elasticsearch

pip install eland # install latest version
pip install eland==7.14.0b1 # install 7.14.0b1

# Pandas DataFrame, Eland DataFrame
DataFrame, Index
Row, Document
Column, Field
df.dtypes, Mapping Types
df.index, _id/@timestamp

import eland as ed

# define a pointer, do not load data to local
df = ed.DataFrame('http://localhost:9200', 'housing')

# fetch and process data on server, then return outcomes
df['ocean_proximity'].value_counts()

Create

# create index from Pandas DataFrame
import pandas as pd
data = {
    'apples': [3, 2, 0, 1], 
    'oranges': [0, 3, 7, 2]
}

purchases = pd.DataFrame(data) # Pandas DataFrame

df = ed.pandas_to_eland(purchases, "localhost:9200", "fruit", es_if_exists="replace",
                        es_refresh=True, # waiting data to be indexed before returning
                       ) # create an index and return Eland DataFrame

# create index from csv
df = ed.csv_to_eland('housing.csv', es_client='localhost', es_dest_index='housing', es_if_exists="replace", es_refresh=True)

Insert

# use Elasticsearch

Read

import eland as ed

# read Eland DataFrame
df = ed.DataFrame('http://localhost:9200', 'my-index') # eland.dataframe.DataFrame

# get size
df.size

# data info
df.info()

# data describe
df.describe()

# get columns
df[['median_house_value', 'ocean_proximity']]

# convert Eland DataFrame to Pandas DataFrame
pd_df = ed.eland_to_pandas(df)
pd_df = df.to_pandas()

# get a Eland Series
s = df['age'] # eland.series.Series
pd_s = s.to_pandas() # pandas.core.series.Series

Update

# use Elasticsearch

Delete

# use Elasticsearch

df = ed.DataFrame('http://localhost:9200', 'news_headline')
df_query = df.es_query({"query":{"range":{"date":{"gte":"2017-05-28T00:00:00.000-04:00","lt":"2017-12-26T00:00:00.000-05:00"}}}})

# aggregation
df = ed.DataFrame('http://localhost:9200', 'housing')
df.aggregate(['max', 'min']) # aggregation of data table
df['households'].min() # aggregation of a series

# group by
df = ed.DataFrame('http://localhost:9200', 'housing')
df.groupby(['ocean_proximity']).min(['median_house_value'])

# count
df['ocean_proximity'].value_counts()

View Mapping

# use Elasticsearch

Create an index with Mapping

# by default, Eland uses the data types of Pandas DataFrame as the data types
pd_df = pd.read_csv('housing.csv') # pandas.core.frame.DataFrame
df = ed.pandas_to_eland(pd_df, "localhost:9200", "housing2", es_if_exists="replace", es_refresh=True)

# override data types
df = ed.pandas_to_eland(pd_df, "localhost:9200", "housing3", es_if_exists="replace", es_refresh=True, es_type_overrides={'ocean_proximity':'text'})

Eland Documentation

PyPI