Eland
Install
pip install eland # install latest version
pip install eland==7.14.0b1 # install 7.14.0b1
            
Basics
# Pandas DataFrame, Eland DataFrame
DataFrame, Index
Row, Document
Column, Field
df.dtypes, Mapping Types
df.index, _id/@timestamp
            
import eland as ed

# define a pointer, do not load data to local
df = ed.DataFrame('http://localhost:9200', 'housing')

# fetch and process data on server, then return outcomes
df['ocean_proximity'].value_counts()
            
Create, Read, Update, and Delete (CRUD)
  • Create
  • # create index from Pandas DataFrame
    import pandas as pd
    data = {
        'apples': [3, 2, 0, 1], 
        'oranges': [0, 3, 7, 2]
    }
    
    purchases = pd.DataFrame(data) # Pandas DataFrame
    
    df = ed.pandas_to_eland(purchases, "localhost:9200", "fruit", es_if_exists="replace",
                            es_refresh=True, # waiting data to be indexed before returning
                           ) # create an index and return Eland DataFrame
    
    # create index from csv
    df = ed.csv_to_eland('housing.csv', es_client='localhost', es_dest_index='housing', es_if_exists="replace", es_refresh=True)
                
  • Insert
  • # use Elasticsearch
                
  • Read
  • import eland as ed
    
    # read Eland DataFrame
    df = ed.DataFrame('http://localhost:9200', 'my-index') # eland.dataframe.DataFrame
    
    # get size
    df.size
    
    # data info
    df.info()
    
    # data describe
    df.describe()
    
    # get columns
    df[['median_house_value', 'ocean_proximity']]
    
    # convert Eland DataFrame to Pandas DataFrame
    pd_df = ed.eland_to_pandas(df)
    pd_df = df.to_pandas()
    
    # get a Eland Series
    s = df['age'] # eland.series.Series
    pd_s = s.to_pandas() # pandas.core.series.Series
                
  • Update
  • # use Elasticsearch
                
  • Delete
  • # use Elasticsearch
                
    Query
    df = ed.DataFrame('http://localhost:9200', 'news_headline')
    df_query = df.es_query({"query":{"range":{"date":{"gte":"2017-05-28T00:00:00.000-04:00","lt":"2017-12-26T00:00:00.000-05:00"}}}})
                
    Aggregation
    # aggregation
    df = ed.DataFrame('http://localhost:9200', 'housing')
    df.aggregate(['max', 'min']) # aggregation of data table
    df['households'].min() # aggregation of a series
    
    # group by
    df = ed.DataFrame('http://localhost:9200', 'housing')
    df.groupby(['ocean_proximity']).min(['median_house_value'])
    
    # count
    df['ocean_proximity'].value_counts()
                
    Mapping
  • View Mapping
  • # use Elasticsearch
                
  • Create an index with Mapping
  • # by default, Eland uses the data types of Pandas DataFrame as the data types
    pd_df = pd.read_csv('housing.csv') # pandas.core.frame.DataFrame
    df = ed.pandas_to_eland(pd_df, "localhost:9200", "housing2", es_if_exists="replace", es_refresh=True)
    
    # override data types
    df = ed.pandas_to_eland(pd_df, "localhost:9200", "housing3", es_if_exists="replace", es_refresh=True, es_type_overrides={'ocean_proximity':'text'})
                
    Reference
  • Eland Documentation
  • PyPI