import pandas as pd
import numpy as np

data = pd.read_csv('weatherAUS.csv')
data['Date']= pd.to_datetime(data['Date'])
data['Index'] = data['Date']
data = data.set_index('Index')


data.head()


ax = data.iloc[:1000, :].plot(x = 'Date', y = 'MinTemp') # matplotlib.axes._subplots.AxesSubplot
ax.set_xlabel('Date Label')
ax.set_ylabel('Temperature')

Text(0, 0.5, 'Temperature')


# Single plot for DataFrame
data.iloc[:1000, :].plot(x = 'Date', y = 'MinTemp')

<AxesSubplot: xlabel='Date'>


# Multiple plots for DataFrame
data.iloc[:1000, :].plot(x = 'Date', y = ['MinTemp', 'MaxTemp'])

<AxesSubplot: xlabel='Date'>


# Use index as x for Series
data.iloc[:1000, :]['MinTemp'].plot()

<AxesSubplot: xlabel='Index'>


# Multiple plots using index as x
data.iloc[:1000, :][['MinTemp', 'MaxTemp']].plot()

<AxesSubplot: xlabel='Index'>


# Bar chart for Series
data_counts = data['WindDir9am'].value_counts() # Series
data_counts.plot.bar()

<AxesSubplot: >


# Bar chart for DataFrame
data_counts = data['WindDir9am'].value_counts().to_frame()
data_counts.reset_index(inplace=True)
data_counts.columns = ['Type', 'Count'] # DataFrame

data_counts.plot.bar(x = 'Type', y = 'Count', rot = 90)

<AxesSubplot: xlabel='Type'>


# Series hist
data['MinTemp'].plot.hist(bins=30)

<AxesSubplot: ylabel='Frequency'>


# DataFrame hist
data[['MinTemp', 'MaxTemp']].plot.hist(alpha=0.5, bins=30)

<AxesSubplot: ylabel='Frequency'>


# Stacked hist
data[['MinTemp', 'MaxTemp']].plot.hist(stacked=True, bins=30)

<AxesSubplot: ylabel='Frequency'>


# Group by
data.iloc[:6000, :].hist(column = 'MinTemp', by='Location')

array([<AxesSubplot: title={'center': 'Albury'}>,
       <AxesSubplot: title={'center': 'BadgerysCreek'}>], dtype=object)


# Box plot with outliers
# IQR = Q3 - Q1
# lower bound = (Q1 - 1.5 IQR)
# upper bound = (Q3 + 1.5 * IQR)
# points that fall outside the lower and upper bounds are labeled as outliers

color = dict(boxes='DarkGreen', whiskers='DarkOrange', medians='DarkBlue', caps='red')
data[['MinTemp', 'MaxTemp']].plot.box(color = color, sym='b+')

<AxesSubplot: >


# use index as x
data.iloc[:100, :][['Humidity9am', 'Humidity3pm']].plot.area(stacked=False)

<AxesSubplot: xlabel='Index'>


# use a specific column as x
data.iloc[:100, :].plot.area(x = 'Date', y = ['Humidity9am', 'Humidity3pm'], stacked=False)

<AxesSubplot: xlabel='Date'>


data.iloc[:100, :].plot.scatter(x = 'Date', y = 'MinTemp', rot=45)

<AxesSubplot: xlabel='Date', ylabel='MinTemp'>


data.iloc[:100, :].plot.hexbin(x = 'MinTemp', y = 'MaxTemp', gridsize=25)

<AxesSubplot: xlabel='MinTemp', ylabel='MaxTemp'>


# Series
data['WindDir9am'].value_counts().plot.pie(figsize=(6, 6))

<AxesSubplot: ylabel='WindDir9am'>


data['MinTemp'].plot.kde()

<AxesSubplot: ylabel='Density'>


from pandas.plotting import andrews_curves
data = pd.read_csv('Iris.csv')
data.head()


andrews_curves(data[['MinTemp', 'Rainfall', 'WindGustSpeed', 'Humidity9am', 'RainTomorrow']], 'RainTomorrow')
#andrews_curves(data[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species']], 'Species')

<AxesSubplot: >


from pandas.plotting import parallel_coordinates
parallel_coordinates(data[['MinTemp', 'Rainfall', 'WindGustSpeed', 'Humidity9am', 'RainTomorrow']], 'RainTomorrow')

<AxesSubplot: >


from pandas.plotting import lag_plot
lag_plot(data['MinTemp'], lag=1)

<AxesSubplot: xlabel='y(t)', ylabel='y(t + 1)'>


# Manually calculate correlation coefficiencies for each lag value
correlations = []
for i in range(6000):
    correlations.append(data['MinTemp'].autocorr(lag=i))
s = pd.Series(correlations)
ax = s.plot()
ax.set_xlabel('Lag')

Text(0.5, 0, 'Lag')


# Use autocorrelation plot
# If there is any missed value, autocorrelation plot does not plot
from pandas.plotting import autocorrelation_plot
ax = autocorrelation_plot(data.iloc[:100, :]['MinTemp'])


from pandas.plotting import bootstrap_plot

# size, number of data points in each sampling
# samples, number of times of sampling
bootstrap_plot(data['MinTemp'], size=50, samples=500)


from pandas.plotting import radviz
radviz(data[['MinTemp', 'Rainfall', 'WindGustSpeed', 'Humidity9am', 'RainTomorrow']], 'RainTomorrow')

<AxesSubplot: >

	Date	Location	MinTemp	MaxTemp	Rainfall	Evaporation	Sunshine	WindGustDir	WindGustSpeed	WindDir9am	...	Humidity9am	Humidity3pm	Pressure9am	Pressure3pm	Cloud9am	Cloud3pm	Temp9am	Temp3pm	RainToday	RainTomorrow
Index
2008-12-01	2008-12-01	Albury	13.4	22.9	0.6	NaN	NaN	W	44.0	W	...	71.0	22.0	1007.7	1007.1	8.0	NaN	16.9	21.8	No	No
2008-12-02	2008-12-02	Albury	7.4	25.1	0.0	NaN	NaN	WNW	44.0	NNW	...	44.0	25.0	1010.6	1007.8	NaN	NaN	17.2	24.3	No	No
2008-12-03	2008-12-03	Albury	12.9	25.7	0.0	NaN	NaN	WSW	46.0	W	...	38.0	30.0	1007.6	1008.7	NaN	2.0	21.0	23.2	No	No
2008-12-04	2008-12-04	Albury	9.2	28.0	0.0	NaN	NaN	NE	24.0	SE	...	45.0	16.0	1017.6	1012.8	NaN	NaN	18.1	26.5	No	No
2008-12-05	2008-12-05	Albury	17.5	32.3	1.0	NaN	NaN	W	41.0	ENE	...	82.0	33.0	1010.8	1006.0	7.0	8.0	17.8	29.7	No	No

	Id	SepalLengthCm	SepalWidthCm	PetalLengthCm	PetalWidthCm	Species
0	1	5.1	3.5	1.4	0.2	Iris-setosa
1	2	4.9	3.0	1.4	0.2	Iris-setosa
2	3	4.7	3.2	1.3	0.2	Iris-setosa
3	4	4.6	3.1	1.5	0.2	Iris-setosa
4	5	5.0	3.6	1.4	0.2	Iris-setosa

Pandas Plot¶

Load Data¶

Format¶

Line Plot¶

Bar Chart¶

Histogram Plot¶

Box Plot¶

Area Plot¶

Scatter Plot¶

Hexagonal Bin Plot¶

Pie Plot¶

Density Plot¶

Andrews Curves¶

Parallel Coordinates¶

Lag Plot¶

Autocorrelation Plot¶

Bootstrap Plot¶

RadViz¶

Reference¶