import plotly
plotly.offline.init_notebook_mode()


import pandas as pd

data =pd.read_csv('weatherAUS.csv', encoding='utf-8')
data = data[data['Location'] == 'Albury']
data = data.set_index('Date', drop=False)


data.head()


import plotly.express as px
fig = px.line(data.iloc[:800, :], x='Date', y='MinTemp')
fig.show()


from scipy import stats
stats.pearsonr(data.iloc[:100, :]['MinTemp'], data.iloc[:100, :]['MaxTemp'])

PearsonRResult(statistic=0.653071047724115, pvalue=1.7892482507532763e-13)


import seaborn as sns

sns.jointplot(x = 'MinTemp', y = 'Humidity9am', edgecolor = 'red', data = data)

<seaborn.axisgrid.JointGrid at 0x132437290>


from dython.nominal import associations
associations(data[['MinTemp', 'MaxTemp', 'WindGustSpeed', 'Humidity9am']])

{'corr':                 MinTemp   MaxTemp  WindGustSpeed  Humidity9am
 MinTemp        1.000000  0.793268       0.402512    -0.544578
 MaxTemp        0.793268  1.000000       0.269381    -0.697633
 WindGustSpeed  0.402512  0.269381       1.000000    -0.291787
 Humidity9am   -0.544578 -0.697633      -0.291787     1.000000,
 'ax': <Axes: >}


import numpy as np
from numpy import polyfit

def get_trend(X, degree = 3):
    """Get trend by fitting 1d data with linear regression
    X (1d array), attribute values
    degree (int), degree of linear regression
    """
    indices = list(range(len(X)))
    coef = polyfit(indices, X, degree)
    ploy = np.poly1d(coef)
    return ploy(indices)

def get_seasonality(X, periods = 4, degree = 3):
    """Get seasonality by spliting 1d into several periods and fitting with linear regression
    X (1d array), attribute values
    periods (int), number of yearly period
    degress (int), degree of linear regression
    """
    indices = [i%(365/periods) for i in range(0, len(X))]
    coef = polyfit(indices, X, degree)
    ploy = np.poly1d(coef)
    return ploy(indices)


# select subset of the data and remove nan values
data_temp = data.iloc[60:200, :]['MinTemp']
idx = np.isfinite(data_temp)
data_temp = data_temp[idx]

# get trend
data_trend = get_trend(data_temp.values)

# get seasonality
# remove trend, then estimate seasonality
data_seasonality = get_seasonality(data_temp - data_trend)

data_temp.shape, data_trend.shape, data_seasonality.shape

# visualization
data_temp = pd.DataFrame({'Temp':data_temp.values, 'Trend':data_trend, 'Seasonality':data_seasonality})
data_temp[['Temp', 'Trend', 'Seasonality']].plot()

<Axes: >


from statsmodels.tsa.seasonal import STL

data_temp = data.iloc[0:200, :]['MinTemp']

res = STL(data_temp, period = 4).fit()
# res.trend, res.seasonal, res.resid
# residue, calculated by subtracting the values of the seasonal and trend component from the time series
plot = res.plot()


from matplotlib import pyplot
from pandas.plotting import lag_plot

lag_plot(data.iloc[:100, :]['MinTemp'], lag = 1)

<Axes: xlabel='y(t)', ylabel='y(t + 1)'>


from pandas.plotting import autocorrelation_plot
ax = autocorrelation_plot(data.iloc[:100, :]['MinTemp'])


from statsmodels.tsa import stattools
stattools.adfuller(data.iloc[:200, :]['MinTemp'])

(-0.2155323177456027,
 0.9366447039032281,
 9,
 190,
 {'1%': -3.4652439354133255,
  '5%': -2.8768752281673717,
  '10%': -2.574944653739612},
 919.2489508353337)

	Date	Location	MinTemp	MaxTemp	Rainfall	Evaporation	Sunshine	WindGustDir	WindGustSpeed	WindDir9am	...	Humidity9am	Humidity3pm	Pressure9am	Pressure3pm	Cloud9am	Cloud3pm	Temp9am	Temp3pm	RainToday	RainTomorrow
Date
2008-12-01	2008-12-01	Albury	13.4	22.9	0.6	NaN	NaN	W	44.0	W	...	71.0	22.0	1007.7	1007.1	8.0	NaN	16.9	21.8	No	No
2008-12-02	2008-12-02	Albury	7.4	25.1	0.0	NaN	NaN	WNW	44.0	NNW	...	44.0	25.0	1010.6	1007.8	NaN	NaN	17.2	24.3	No	No
2008-12-03	2008-12-03	Albury	12.9	25.7	0.0	NaN	NaN	WSW	46.0	W	...	38.0	30.0	1007.6	1008.7	NaN	2.0	21.0	23.2	No	No
2008-12-04	2008-12-04	Albury	9.2	28.0	0.0	NaN	NaN	NE	24.0	SE	...	45.0	16.0	1017.6	1012.8	NaN	NaN	18.1	26.5	No	No
2008-12-05	2008-12-05	Albury	17.5	32.3	1.0	NaN	NaN	W	41.0	ENE	...	82.0	33.0	1010.8	1006.0	7.0	8.0	17.8	29.7	No	No

Time-Series Analysis¶

Time-Series analysis (TSA)¶

Load Data¶

Correlation¶

Identify Trend and Seasonality¶

Lag Plot¶

Stationarity¶

Reference¶