Eland
Install
pip install eland # install latest version
pip install eland==7.14.0b1 # install 7.14.0b1
Basics
# Pandas DataFrame, Eland DataFrame
DataFrame, Index
Row, Document
Column, Field
df.dtypes, Mapping Types
df.index, _id/@timestamp
import eland as ed
# define a pointer, do not load data to local
df = ed.DataFrame('http://localhost:9200', 'housing')
# fetch and process data on server, then return outcomes
df['ocean_proximity'].value_counts()
Create, Read, Update, and Delete (CRUD)
Create
# create index from Pandas DataFrame
import pandas as pd
data = {
'apples': [3, 2, 0, 1],
'oranges': [0, 3, 7, 2]
}
purchases = pd.DataFrame(data) # Pandas DataFrame
df = ed.pandas_to_eland(purchases, "localhost:9200", "fruit", es_if_exists="replace",
es_refresh=True, # waiting data to be indexed before returning
) # create an index and return Eland DataFrame
# create index from csv
df = ed.csv_to_eland('housing.csv', es_client='localhost', es_dest_index='housing', es_if_exists="replace", es_refresh=True)
Insert
# use Elasticsearch
Read
import eland as ed
# read Eland DataFrame
df = ed.DataFrame('http://localhost:9200', 'my-index') # eland.dataframe.DataFrame
# get size
df.size
# data info
df.info()
# data describe
df.describe()
# get columns
df[['median_house_value', 'ocean_proximity']]
# convert Eland DataFrame to Pandas DataFrame
pd_df = ed.eland_to_pandas(df)
pd_df = df.to_pandas()
# get a Eland Series
s = df['age'] # eland.series.Series
pd_s = s.to_pandas() # pandas.core.series.Series
Update
# use Elasticsearch
Delete
# use Elasticsearch
Query
df = ed.DataFrame('http://localhost:9200', 'news_headline')
df_query = df.es_query({"query":{"range":{"date":{"gte":"2017-05-28T00:00:00.000-04:00","lt":"2017-12-26T00:00:00.000-05:00"}}}})
Aggregation
# aggregation
df = ed.DataFrame('http://localhost:9200', 'housing')
df.aggregate(['max', 'min']) # aggregation of data table
df['households'].min() # aggregation of a series
# group by
df = ed.DataFrame('http://localhost:9200', 'housing')
df.groupby(['ocean_proximity']).min(['median_house_value'])
# count
df['ocean_proximity'].value_counts()
Mapping
View Mapping
# use Elasticsearch
Create an index with Mapping
# by default, Eland uses the data types of Pandas DataFrame as the data types
pd_df = pd.read_csv('housing.csv') # pandas.core.frame.DataFrame
df = ed.pandas_to_eland(pd_df, "localhost:9200", "housing2", es_if_exists="replace", es_refresh=True)
# override data types
df = ed.pandas_to_eland(pd_df, "localhost:9200", "housing3", es_if_exists="replace", es_refresh=True, es_type_overrides={'ocean_proximity':'text'})
Reference