DateTime
¶

In [ ]:
import pandas as pd

Load Data¶

In [95]:
df = pd.read_csv('aapl.csv', parse_dates=['Date'], index_col = "Date")
In [59]:
df.head()
Out[59]:
Open High Low Close Volume Adj Close
Date
2008-10-14 116.26 116.40 103.14 104.08 70749800 104.08
2008-10-13 104.55 110.53 101.02 110.26 54967000 110.26
2008-10-10 85.70 100.00 85.00 96.80 79260700 96.80
2008-10-09 93.35 95.80 86.60 88.74 57763700 88.74
2008-10-08 85.91 96.33 85.68 89.79 78847900 89.79
In [10]:
df.index # DatetimeIndex
Out[10]:
DatetimeIndex(['2008-10-14', '2008-10-13', '2008-10-10', '2008-10-09',
               '2008-10-08', '2008-10-07', '2008-10-06', '2008-10-03',
               '2008-10-02', '2008-10-01',
               ...
               '1984-09-20', '1984-09-19', '1984-09-18', '1984-09-17',
               '1984-09-14', '1984-09-13', '1984-09-12', '1984-09-11',
               '1984-09-10', '1984-09-07'],
              dtype='datetime64[ns]', name='Date', length=6081, freq=None)

Timestamp¶

In [135]:
ts = pd.Timestamp("2018-01-05")
ts
Out[135]:
Timestamp('2018-01-05 00:00:00')
In [136]:
ts.day_name()
Out[136]:
'Friday'
In [137]:
ts+pd.Timedelta("1 day") # add one day
Out[137]:
Timestamp('2018-01-06 00:00:00')
In [138]:
ts+pd.offsets.BDay() # add one business day
Out[138]:
Timestamp('2018-01-08 00:00:00')
In [139]:
# Null data time
pd.Timestamp(pd.NaT)
Out[139]:
NaT

Selection¶

In [92]:
df['2008-01'] # select one month data
# or df.loc['2008-01']
/var/folders/99/z39gcgsd2_z4nj1wpdp68jm00000gp/T/ipykernel_57501/3730440384.py:1: FutureWarning: Indexing a DataFrame with a datetimelike index using a single string to slice the rows, like `frame[string]`, is deprecated and will be removed in a future version. Use `frame.loc[string]` instead.
  df['2008-01'] # select one month data
Out[92]:
Open High Low Close Volume Adj Close
Date
2008-01-31 129.45 136.65 129.40 135.36 48059800 135.36
2008-01-30 131.37 135.45 130.00 132.18 44394700 132.18
2008-01-29 131.15 132.79 129.05 131.54 39285100 131.54
2008-01-28 128.16 133.20 126.45 130.01 52673000 130.01
2008-01-25 138.99 139.09 129.61 130.01 55526400 130.01
2008-01-24 139.99 140.70 132.01 135.60 71638100 135.60
2008-01-23 136.19 140.00 126.14 139.07 120463200 139.07
2008-01-22 148.06 159.98 146.00 155.64 86955500 155.64
2008-01-18 161.71 165.75 159.61 161.36 61583700 161.36
2008-01-17 161.51 165.36 158.42 160.89 62780700 160.89
2008-01-16 165.23 169.01 156.70 159.64 79065900 159.64
2008-01-15 177.72 179.22 164.66 169.04 83688500 169.04
2008-01-14 177.52 179.42 175.17 178.78 39301800 178.78
2008-01-11 176.00 177.85 170.00 172.69 44010200 172.69
2008-01-10 177.58 181.00 175.41 178.02 52963400 178.02
2008-01-09 171.30 179.50 168.30 179.40 64781500 179.40
2008-01-08 180.14 182.46 170.80 171.25 54422000 171.25
2008-01-07 181.25 183.60 170.23 177.64 74006900 177.64
2008-01-04 191.45 193.00 178.89 180.05 51994000 180.05
2008-01-03 195.41 197.39 192.69 194.93 30073800 194.93
2008-01-02 199.27 200.26 192.55 194.84 38542100 194.84
In [97]:
df.index
Out[97]:
DatetimeIndex(['2008-10-14', '2008-10-13', '2008-10-10', '2008-10-09',
               '2008-10-08', '2008-10-07', '2008-10-06', '2008-10-03',
               '2008-10-02', '2008-10-01',
               ...
               '1984-09-20', '1984-09-19', '1984-09-18', '1984-09-17',
               '1984-09-14', '1984-09-13', '1984-09-12', '1984-09-11',
               '1984-09-10', '1984-09-07'],
              dtype='datetime64[ns]', name='Date', length=6081, freq=None)
In [100]:
# String slicing
df['2008-01-02':'2008-01-03'] # inclusive
Out[100]:
Open High Low Close Volume Adj Close
Date
2008-01-03 195.41 197.39 192.69 194.93 30073800 194.93
2008-01-02 199.27 200.26 192.55 194.84 38542100 194.84
In [65]:
df.asfreq('Y', method='pad') # select data every year
Out[65]:
Open High Low Close Volume Adj Close
Date
1984-12-31 29.12 29.25 29.12 29.12 7453600 3.32
1985-12-31 22.25 22.37 22.00 22.00 3158400 2.51
1986-12-31 41.00 41.38 40.38 40.50 4742400 4.62
1987-12-31 42.50 43.00 41.88 42.00 4200000 9.63
1988-12-31 40.25 40.50 40.00 40.38 3578800 9.34
1989-12-31 35.25 37.50 35.00 37.25 6555600 8.70
1990-12-31 43.00 43.25 42.75 43.00 1593200 10.17
1991-12-31 57.38 58.00 56.00 56.38 4802000 13.46
1992-12-31 58.75 60.00 58.75 59.75 3302000 14.39
1993-12-31 29.75 30.25 29.25 29.25 5765200 7.13
1994-12-31 38.88 38.88 37.88 38.38 3726400 9.49
1995-12-31 32.25 32.25 31.75 32.13 4983200 8.03
1996-12-31 21.37 21.50 20.75 20.87 13719200 5.22
1997-12-31 13.13 13.63 12.94 13.13 14531200 3.28
1998-12-31 40.50 41.38 39.50 40.94 9716400 10.23
1999-12-31 100.94 102.87 99.50 102.81 5856400 25.70
2000-12-31 14.88 15.25 14.56 14.88 16161800 7.44
2001-12-31 22.51 22.66 21.83 21.90 4920800 10.95
2002-12-31 14.00 14.36 13.95 14.33 7168800 7.16
2003-12-31 21.35 21.53 21.18 21.37 6230400 10.69
2004-12-31 64.89 65.00 64.03 64.40 9949600 32.20
2005-12-31 72.38 74.75 72.25 74.75 28829800 74.75
2006-12-31 86.29 86.58 81.90 83.80 44225700 83.80
2007-12-31 199.50 200.50 197.75 198.08 19261900 198.08
In [105]:
df.index[0]
Out[105]:
Timestamp('2008-10-14 00:00:00')
In [107]:
df.head()
Out[107]:
Open High Low Close Volume Adj Close
Date
2008-10-14 116.26 116.40 103.14 104.08 70749800 104.08
2008-10-13 104.55 110.53 101.02 110.26 54967000 110.26
2008-10-10 85.70 100.00 85.00 96.80 79260700 96.80
2008-10-09 93.35 95.80 86.60 88.74 57763700 88.74
2008-10-08 85.91 96.33 85.68 89.79 78847900 89.79
In [122]:
# exact match, Series
series_minute = pd.Series(
    [1, 2, 3],
    pd.DatetimeIndex(
        ["2011-12-31 23:59:00", "2012-01-01 00:00:00", "2012-01-01 00:02:00"]
    ),
)

series_minute["2011-12-31 23:59:00"]
# or series_minute.loc["2011-12-31 23:59:00"]
Out[122]:
1
In [130]:
# exact match, DateFrame
df.loc['2008-10-14 00:00:00']
Out[130]:
Open              116.26
High              116.40
Low               103.14
Close             104.08
Volume       70749800.00
Adj Close         104.08
Name: 2008-10-14 00:00:00, dtype: float64
In [131]:
# fancy indexing
df.index[[1, 3, 5]]
Out[131]:
DatetimeIndex(['2008-10-13', '2008-10-09', '2008-10-07'], dtype='datetime64[ns]', name='Date', freq=None)

Resample¶

In [36]:
df.resample('M').mean() # monthly mean
Out[36]:
Open High Low Close Volume Adj Close
Date
1984-09-30 26.981250 27.333125 26.606250 26.738750 4.807300e+06 3.051875
1984-10-31 25.035652 25.313478 24.780435 24.806957 5.559409e+06 2.830435
1984-11-30 24.545238 24.782857 24.188095 24.236190 5.749562e+06 2.764762
1984-12-31 27.060000 27.378500 26.841000 26.947500 6.195360e+06 3.075000
1985-01-31 29.520000 29.855909 29.140000 29.253182 1.035382e+07 3.337727
... ... ... ... ... ... ...
2008-06-30 178.928571 181.549524 175.414286 178.485714 3.311754e+07 178.485714
2008-07-31 167.790455 170.846818 164.117273 167.748636 3.206130e+07 167.748636
2008-08-31 170.620476 173.298571 168.745238 171.069524 2.225062e+07 171.069524
2008-09-30 143.129524 146.230000 137.534286 141.243333 4.091339e+07 141.243333
2008-10-31 100.214000 104.699000 93.997000 98.326000 6.696761e+07 98.326000

290 rows × 6 columns

In [39]:
df['Close'].resample('M').mean().plot()
Out[39]:
<Axes: xlabel='Date'>

DatetimeIndex¶

In [84]:
# create DatetimeIndex from a list
pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"])
Out[84]:
DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq=None)
In [82]:
import numpy as np

dates = [
    pd.Timestamp("2012-05-01"),
    pd.Timestamp("2012-05-02"),
    pd.Timestamp("2012-05-03"),
]

s = pd.Series(np.random.randn(3), index = dates) # Timestamp to DatetimeIndex
s.index
Out[82]:
DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)
In [86]:
dates = [
    pd.Timestamp("2012-05-01"),
    pd.Timestamp("2012-05-02"),
    pd.Timestamp("2012-05-03"),
]

pd.Index(dates) # Timestamp to DatetimeIndex
Out[86]:
DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)
In [119]:
rng = pd.date_range('2001-01-01', '2002-01-01', freq = 'M') # Datetime Index
rng, rng.resolution
Out[119]:
(DatetimeIndex(['2001-01-31', '2001-02-28', '2001-03-31', '2001-04-30',
                '2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31',
                '2001-09-30', '2001-10-31', '2001-11-30', '2001-12-31'],
               dtype='datetime64[ns]', freq='M'),
 'day')
In [88]:
rng = pd.date_range('2001-01-01', periods = 10, freq = 'M')
rng
Out[88]:
DatetimeIndex(['2001-01-31', '2001-02-28', '2001-03-31', '2001-04-30',
               '2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31',
               '2001-09-30', '2001-10-31'],
              dtype='datetime64[ns]', freq='M')
In [89]:
# slicing
rng[:5]
Out[89]:
DatetimeIndex(['2001-01-31', '2001-02-28', '2001-03-31', '2001-04-30',
               '2001-05-31'],
              dtype='datetime64[ns]', freq='M')
In [76]:
# select by datetime index
df[df.index.isin(rng)]
Out[76]:
Open High Low Close Volume Adj Close
Date
2001-10-31 17.73 18.40 17.44 17.56 9776800 8.78
2001-08-31 17.73 18.60 17.65 18.55 7746600 9.27
2001-07-31 19.27 19.42 18.51 18.79 8393800 9.40
2001-05-31 19.80 20.24 19.49 19.95 15817600 9.98
2001-04-30 26.70 27.12 24.87 25.49 17670600 12.74
2001-02-28 19.37 19.44 18.12 18.25 18157600 9.12
2001-01-31 21.50 22.50 21.44 21.62 26106000 10.81

Series¶

In [69]:
s = pd.Series(range(len(rng)), index = rng)
s, type(s), s.index
Out[69]:
(2001-01-31     0
 2001-02-28     1
 2001-03-31     2
 2001-04-30     3
 2001-05-31     4
 2001-06-30     5
 2001-07-31     6
 2001-08-31     7
 2001-09-30     8
 2001-10-31     9
 2001-11-30    10
 2001-12-31    11
 Freq: M, dtype: int64,
 pandas.core.series.Series,
 DatetimeIndex(['2001-01-31', '2001-02-28', '2001-03-31', '2001-04-30',
                '2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31',
                '2001-09-30', '2001-10-31', '2001-11-30', '2001-12-31'],
               dtype='datetime64[ns]', freq='M'))

Convert to Period¶

In [140]:
# convert DatetimeIndex to PeriodIndex
rng = pd.date_range('2001-01-01', periods = 10, freq = 'M')
rng.to_period()
Out[140]:
PeriodIndex(['2001-01', '2001-02', '2001-03', '2001-04', '2001-05', '2001-06',
             '2001-07', '2001-08', '2001-09', '2001-10'],
            dtype='period[M]')
In [142]:
# convert a Series with DatetimeIndex to a Series with PeriodIndex
s = pd.Series(range(len(rng)), index = rng)
s.to_period()
Out[142]:
PeriodIndex(['2001-01', '2001-02', '2001-03', '2001-04', '2001-05', '2001-06',
             '2001-07', '2001-08', '2001-09', '2001-10'],
            dtype='period[M]')

Reference¶

  • Offset aliases
  • Time series / date functionality
  • Pandas Time Series Analysis Part 1: DatetimeIndex and Resample
  • DateOffset objects