import pandas as pd
df = pd.read_csv('aapl.csv', parse_dates=['Date'], index_col = "Date")
df.head()
Open | High | Low | Close | Volume | Adj Close | |
---|---|---|---|---|---|---|
Date | ||||||
2008-10-14 | 116.26 | 116.40 | 103.14 | 104.08 | 70749800 | 104.08 |
2008-10-13 | 104.55 | 110.53 | 101.02 | 110.26 | 54967000 | 110.26 |
2008-10-10 | 85.70 | 100.00 | 85.00 | 96.80 | 79260700 | 96.80 |
2008-10-09 | 93.35 | 95.80 | 86.60 | 88.74 | 57763700 | 88.74 |
2008-10-08 | 85.91 | 96.33 | 85.68 | 89.79 | 78847900 | 89.79 |
df.index # DatetimeIndex
DatetimeIndex(['2008-10-14', '2008-10-13', '2008-10-10', '2008-10-09', '2008-10-08', '2008-10-07', '2008-10-06', '2008-10-03', '2008-10-02', '2008-10-01', ... '1984-09-20', '1984-09-19', '1984-09-18', '1984-09-17', '1984-09-14', '1984-09-13', '1984-09-12', '1984-09-11', '1984-09-10', '1984-09-07'], dtype='datetime64[ns]', name='Date', length=6081, freq=None)
ts = pd.Timestamp("2018-01-05")
ts
Timestamp('2018-01-05 00:00:00')
ts.day_name()
'Friday'
ts+pd.Timedelta("1 day") # add one day
Timestamp('2018-01-06 00:00:00')
ts+pd.offsets.BDay() # add one business day
Timestamp('2018-01-08 00:00:00')
# Null data time
pd.Timestamp(pd.NaT)
NaT
df['2008-01'] # select one month data
# or df.loc['2008-01']
/var/folders/99/z39gcgsd2_z4nj1wpdp68jm00000gp/T/ipykernel_57501/3730440384.py:1: FutureWarning: Indexing a DataFrame with a datetimelike index using a single string to slice the rows, like `frame[string]`, is deprecated and will be removed in a future version. Use `frame.loc[string]` instead. df['2008-01'] # select one month data
Open | High | Low | Close | Volume | Adj Close | |
---|---|---|---|---|---|---|
Date | ||||||
2008-01-31 | 129.45 | 136.65 | 129.40 | 135.36 | 48059800 | 135.36 |
2008-01-30 | 131.37 | 135.45 | 130.00 | 132.18 | 44394700 | 132.18 |
2008-01-29 | 131.15 | 132.79 | 129.05 | 131.54 | 39285100 | 131.54 |
2008-01-28 | 128.16 | 133.20 | 126.45 | 130.01 | 52673000 | 130.01 |
2008-01-25 | 138.99 | 139.09 | 129.61 | 130.01 | 55526400 | 130.01 |
2008-01-24 | 139.99 | 140.70 | 132.01 | 135.60 | 71638100 | 135.60 |
2008-01-23 | 136.19 | 140.00 | 126.14 | 139.07 | 120463200 | 139.07 |
2008-01-22 | 148.06 | 159.98 | 146.00 | 155.64 | 86955500 | 155.64 |
2008-01-18 | 161.71 | 165.75 | 159.61 | 161.36 | 61583700 | 161.36 |
2008-01-17 | 161.51 | 165.36 | 158.42 | 160.89 | 62780700 | 160.89 |
2008-01-16 | 165.23 | 169.01 | 156.70 | 159.64 | 79065900 | 159.64 |
2008-01-15 | 177.72 | 179.22 | 164.66 | 169.04 | 83688500 | 169.04 |
2008-01-14 | 177.52 | 179.42 | 175.17 | 178.78 | 39301800 | 178.78 |
2008-01-11 | 176.00 | 177.85 | 170.00 | 172.69 | 44010200 | 172.69 |
2008-01-10 | 177.58 | 181.00 | 175.41 | 178.02 | 52963400 | 178.02 |
2008-01-09 | 171.30 | 179.50 | 168.30 | 179.40 | 64781500 | 179.40 |
2008-01-08 | 180.14 | 182.46 | 170.80 | 171.25 | 54422000 | 171.25 |
2008-01-07 | 181.25 | 183.60 | 170.23 | 177.64 | 74006900 | 177.64 |
2008-01-04 | 191.45 | 193.00 | 178.89 | 180.05 | 51994000 | 180.05 |
2008-01-03 | 195.41 | 197.39 | 192.69 | 194.93 | 30073800 | 194.93 |
2008-01-02 | 199.27 | 200.26 | 192.55 | 194.84 | 38542100 | 194.84 |
df.index
DatetimeIndex(['2008-10-14', '2008-10-13', '2008-10-10', '2008-10-09', '2008-10-08', '2008-10-07', '2008-10-06', '2008-10-03', '2008-10-02', '2008-10-01', ... '1984-09-20', '1984-09-19', '1984-09-18', '1984-09-17', '1984-09-14', '1984-09-13', '1984-09-12', '1984-09-11', '1984-09-10', '1984-09-07'], dtype='datetime64[ns]', name='Date', length=6081, freq=None)
# String slicing
df['2008-01-02':'2008-01-03'] # inclusive
Open | High | Low | Close | Volume | Adj Close | |
---|---|---|---|---|---|---|
Date | ||||||
2008-01-03 | 195.41 | 197.39 | 192.69 | 194.93 | 30073800 | 194.93 |
2008-01-02 | 199.27 | 200.26 | 192.55 | 194.84 | 38542100 | 194.84 |
df.asfreq('Y', method='pad') # select data every year
Open | High | Low | Close | Volume | Adj Close | |
---|---|---|---|---|---|---|
Date | ||||||
1984-12-31 | 29.12 | 29.25 | 29.12 | 29.12 | 7453600 | 3.32 |
1985-12-31 | 22.25 | 22.37 | 22.00 | 22.00 | 3158400 | 2.51 |
1986-12-31 | 41.00 | 41.38 | 40.38 | 40.50 | 4742400 | 4.62 |
1987-12-31 | 42.50 | 43.00 | 41.88 | 42.00 | 4200000 | 9.63 |
1988-12-31 | 40.25 | 40.50 | 40.00 | 40.38 | 3578800 | 9.34 |
1989-12-31 | 35.25 | 37.50 | 35.00 | 37.25 | 6555600 | 8.70 |
1990-12-31 | 43.00 | 43.25 | 42.75 | 43.00 | 1593200 | 10.17 |
1991-12-31 | 57.38 | 58.00 | 56.00 | 56.38 | 4802000 | 13.46 |
1992-12-31 | 58.75 | 60.00 | 58.75 | 59.75 | 3302000 | 14.39 |
1993-12-31 | 29.75 | 30.25 | 29.25 | 29.25 | 5765200 | 7.13 |
1994-12-31 | 38.88 | 38.88 | 37.88 | 38.38 | 3726400 | 9.49 |
1995-12-31 | 32.25 | 32.25 | 31.75 | 32.13 | 4983200 | 8.03 |
1996-12-31 | 21.37 | 21.50 | 20.75 | 20.87 | 13719200 | 5.22 |
1997-12-31 | 13.13 | 13.63 | 12.94 | 13.13 | 14531200 | 3.28 |
1998-12-31 | 40.50 | 41.38 | 39.50 | 40.94 | 9716400 | 10.23 |
1999-12-31 | 100.94 | 102.87 | 99.50 | 102.81 | 5856400 | 25.70 |
2000-12-31 | 14.88 | 15.25 | 14.56 | 14.88 | 16161800 | 7.44 |
2001-12-31 | 22.51 | 22.66 | 21.83 | 21.90 | 4920800 | 10.95 |
2002-12-31 | 14.00 | 14.36 | 13.95 | 14.33 | 7168800 | 7.16 |
2003-12-31 | 21.35 | 21.53 | 21.18 | 21.37 | 6230400 | 10.69 |
2004-12-31 | 64.89 | 65.00 | 64.03 | 64.40 | 9949600 | 32.20 |
2005-12-31 | 72.38 | 74.75 | 72.25 | 74.75 | 28829800 | 74.75 |
2006-12-31 | 86.29 | 86.58 | 81.90 | 83.80 | 44225700 | 83.80 |
2007-12-31 | 199.50 | 200.50 | 197.75 | 198.08 | 19261900 | 198.08 |
df.index[0]
Timestamp('2008-10-14 00:00:00')
df.head()
Open | High | Low | Close | Volume | Adj Close | |
---|---|---|---|---|---|---|
Date | ||||||
2008-10-14 | 116.26 | 116.40 | 103.14 | 104.08 | 70749800 | 104.08 |
2008-10-13 | 104.55 | 110.53 | 101.02 | 110.26 | 54967000 | 110.26 |
2008-10-10 | 85.70 | 100.00 | 85.00 | 96.80 | 79260700 | 96.80 |
2008-10-09 | 93.35 | 95.80 | 86.60 | 88.74 | 57763700 | 88.74 |
2008-10-08 | 85.91 | 96.33 | 85.68 | 89.79 | 78847900 | 89.79 |
# exact match, Series
series_minute = pd.Series(
[1, 2, 3],
pd.DatetimeIndex(
["2011-12-31 23:59:00", "2012-01-01 00:00:00", "2012-01-01 00:02:00"]
),
)
series_minute["2011-12-31 23:59:00"]
# or series_minute.loc["2011-12-31 23:59:00"]
1
# exact match, DateFrame
df.loc['2008-10-14 00:00:00']
Open 116.26 High 116.40 Low 103.14 Close 104.08 Volume 70749800.00 Adj Close 104.08 Name: 2008-10-14 00:00:00, dtype: float64
# fancy indexing
df.index[[1, 3, 5]]
DatetimeIndex(['2008-10-13', '2008-10-09', '2008-10-07'], dtype='datetime64[ns]', name='Date', freq=None)
df.resample('M').mean() # monthly mean
Open | High | Low | Close | Volume | Adj Close | |
---|---|---|---|---|---|---|
Date | ||||||
1984-09-30 | 26.981250 | 27.333125 | 26.606250 | 26.738750 | 4.807300e+06 | 3.051875 |
1984-10-31 | 25.035652 | 25.313478 | 24.780435 | 24.806957 | 5.559409e+06 | 2.830435 |
1984-11-30 | 24.545238 | 24.782857 | 24.188095 | 24.236190 | 5.749562e+06 | 2.764762 |
1984-12-31 | 27.060000 | 27.378500 | 26.841000 | 26.947500 | 6.195360e+06 | 3.075000 |
1985-01-31 | 29.520000 | 29.855909 | 29.140000 | 29.253182 | 1.035382e+07 | 3.337727 |
... | ... | ... | ... | ... | ... | ... |
2008-06-30 | 178.928571 | 181.549524 | 175.414286 | 178.485714 | 3.311754e+07 | 178.485714 |
2008-07-31 | 167.790455 | 170.846818 | 164.117273 | 167.748636 | 3.206130e+07 | 167.748636 |
2008-08-31 | 170.620476 | 173.298571 | 168.745238 | 171.069524 | 2.225062e+07 | 171.069524 |
2008-09-30 | 143.129524 | 146.230000 | 137.534286 | 141.243333 | 4.091339e+07 | 141.243333 |
2008-10-31 | 100.214000 | 104.699000 | 93.997000 | 98.326000 | 6.696761e+07 | 98.326000 |
290 rows × 6 columns
df['Close'].resample('M').mean().plot()
<Axes: xlabel='Date'>
# create DatetimeIndex from a list
pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"])
DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq=None)
import numpy as np
dates = [
pd.Timestamp("2012-05-01"),
pd.Timestamp("2012-05-02"),
pd.Timestamp("2012-05-03"),
]
s = pd.Series(np.random.randn(3), index = dates) # Timestamp to DatetimeIndex
s.index
DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)
dates = [
pd.Timestamp("2012-05-01"),
pd.Timestamp("2012-05-02"),
pd.Timestamp("2012-05-03"),
]
pd.Index(dates) # Timestamp to DatetimeIndex
DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)
rng = pd.date_range('2001-01-01', '2002-01-01', freq = 'M') # Datetime Index
rng, rng.resolution
(DatetimeIndex(['2001-01-31', '2001-02-28', '2001-03-31', '2001-04-30', '2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31', '2001-09-30', '2001-10-31', '2001-11-30', '2001-12-31'], dtype='datetime64[ns]', freq='M'), 'day')
rng = pd.date_range('2001-01-01', periods = 10, freq = 'M')
rng
DatetimeIndex(['2001-01-31', '2001-02-28', '2001-03-31', '2001-04-30', '2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31', '2001-09-30', '2001-10-31'], dtype='datetime64[ns]', freq='M')
# slicing
rng[:5]
DatetimeIndex(['2001-01-31', '2001-02-28', '2001-03-31', '2001-04-30', '2001-05-31'], dtype='datetime64[ns]', freq='M')
# select by datetime index
df[df.index.isin(rng)]
Open | High | Low | Close | Volume | Adj Close | |
---|---|---|---|---|---|---|
Date | ||||||
2001-10-31 | 17.73 | 18.40 | 17.44 | 17.56 | 9776800 | 8.78 |
2001-08-31 | 17.73 | 18.60 | 17.65 | 18.55 | 7746600 | 9.27 |
2001-07-31 | 19.27 | 19.42 | 18.51 | 18.79 | 8393800 | 9.40 |
2001-05-31 | 19.80 | 20.24 | 19.49 | 19.95 | 15817600 | 9.98 |
2001-04-30 | 26.70 | 27.12 | 24.87 | 25.49 | 17670600 | 12.74 |
2001-02-28 | 19.37 | 19.44 | 18.12 | 18.25 | 18157600 | 9.12 |
2001-01-31 | 21.50 | 22.50 | 21.44 | 21.62 | 26106000 | 10.81 |
s = pd.Series(range(len(rng)), index = rng)
s, type(s), s.index
(2001-01-31 0 2001-02-28 1 2001-03-31 2 2001-04-30 3 2001-05-31 4 2001-06-30 5 2001-07-31 6 2001-08-31 7 2001-09-30 8 2001-10-31 9 2001-11-30 10 2001-12-31 11 Freq: M, dtype: int64, pandas.core.series.Series, DatetimeIndex(['2001-01-31', '2001-02-28', '2001-03-31', '2001-04-30', '2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31', '2001-09-30', '2001-10-31', '2001-11-30', '2001-12-31'], dtype='datetime64[ns]', freq='M'))
# convert DatetimeIndex to PeriodIndex
rng = pd.date_range('2001-01-01', periods = 10, freq = 'M')
rng.to_period()
PeriodIndex(['2001-01', '2001-02', '2001-03', '2001-04', '2001-05', '2001-06', '2001-07', '2001-08', '2001-09', '2001-10'], dtype='period[M]')
# convert a Series with DatetimeIndex to a Series with PeriodIndex
s = pd.Series(range(len(rng)), index = rng)
s.to_period()
PeriodIndex(['2001-01', '2001-02', '2001-03', '2001-04', '2001-05', '2001-06', '2001-07', '2001-08', '2001-09', '2001-10'], dtype='period[M]')