Date times: A specific date and time with timezone support.
Time deltas: An absolute time duration.
Time spans: A span of time defined by a point in time and its associated frequency.
Date offsets: A relative time duration that respects calendar arithmetic.
import pandas as pd
import numpy as np
import datetime
pdi = pd.to_datetime(
["2023-01-01", "1/1/2018", np.datetime64("2018-01-01"), datetime.datetime(2018, 1, 1)]) # DatetimeIndex
pdi[0] # Timestamp
Timestamp('2023-01-01 00:00:00')
# create a timestamp with a specific format
# https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior
pd.to_datetime("2010/11/12", format="%Y/%m/%d")
Timestamp('2010-11-12 00:00:00')
# create timestamps from mutliple columns
df = pd.DataFrame(
{"year": [2015, 2016], "month": [2, 3], "day": [4, 5], "hour": [2, 3]}) # DateFrame
pd.to_datetime(df) # Series
pd.to_datetime(df[["year", "month", "day"]]) # Series
0 2015-02-04 1 2016-03-05 dtype: datetime64[ns]
# convert epoch to Timestamp
pd.to_datetime(
[1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit="s")
DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05', '2012-10-10 18:15:05', '2012-10-11 18:15:05', '2012-10-12 18:15:05'], dtype='datetime64[ns]', freq=None)
# convert Timestamp to epoch
stamps = pd.date_range("2012-10-08 18:15:05", periods=4, freq="D")
(stamps - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s")
Int64Index([1349720105, 1349806505, 1349892905, 1349979305], dtype='int64')
# convert int to Timestamp
# by default, origin is 1970-01-01 00:00:00
pd.to_datetime([1, 2, 3], unit="D", origin=pd.Timestamp("1960-01-01"))
DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], dtype='datetime64[ns]', freq=None)
# create a sequence of timestamps without a time zone
dti = pd.date_range("2023-04-20", periods=3, freq="H") # DatetimeIndex
dti
DatetimeIndex(['2023-04-20 00:00:00', '2023-04-20 01:00:00', '2023-04-20 02:00:00'], dtype='datetime64[ns]', freq='H')
pd.date_range(start = '2001-04', end = '2002-04') # 366 days
DatetimeIndex(['2001-04-01', '2001-04-02', '2001-04-03', '2001-04-04', '2001-04-05', '2001-04-06', '2001-04-07', '2001-04-08', '2001-04-09', '2001-04-10', ... '2002-03-23', '2002-03-24', '2002-03-25', '2002-03-26', '2002-03-27', '2002-03-28', '2002-03-29', '2002-03-30', '2002-03-31', '2002-04-01'], dtype='datetime64[ns]', length=366, freq='D')
# define week days and holidays
pd.bdate_range(start = '2001-01', end = '2002-01', freq="C", weekmask="Mon Wed Fri", holidays=['2001-01-01', datetime.datetime(2011, 3, 14)]) # 156 days
DatetimeIndex(['2001-01-03', '2001-01-05', '2001-01-08', '2001-01-10', '2001-01-12', '2001-01-15', '2001-01-17', '2001-01-19', '2001-01-22', '2001-01-24', ... '2001-12-10', '2001-12-12', '2001-12-14', '2001-12-17', '2001-12-19', '2001-12-21', '2001-12-24', '2001-12-26', '2001-12-28', '2001-12-31'], dtype='datetime64[ns]', length=156, freq='C')
pd.bdate_range(start='2001-04', periods=20, freq='D')
DatetimeIndex(['2001-04-01', '2001-04-02', '2001-04-03', '2001-04-04', '2001-04-05', '2001-04-06', '2001-04-07', '2001-04-08', '2001-04-09', '2001-04-10', '2001-04-11', '2001-04-12', '2001-04-13', '2001-04-14', '2001-04-15', '2001-04-16', '2001-04-17', '2001-04-18', '2001-04-19', '2001-04-20'], dtype='datetime64[ns]', freq='D')
pd.bdate_range(start = '2001-04', end = '2002-04') # 261 days
DatetimeIndex(['2001-04-02', '2001-04-03', '2001-04-04', '2001-04-05', '2001-04-06', '2001-04-09', '2001-04-10', '2001-04-11', '2001-04-12', '2001-04-13', ... '2002-03-19', '2002-03-20', '2002-03-21', '2002-03-22', '2002-03-25', '2002-03-26', '2002-03-27', '2002-03-28', '2002-03-29', '2002-04-01'], dtype='datetime64[ns]', length=261, freq='B')
dti.tz_localize('US/Eastern')
DatetimeIndex(['2023-04-20 00:00:00-04:00', '2023-04-20 01:00:00-04:00', '2023-04-20 02:00:00-04:00'], dtype='datetime64[ns, US/Eastern]', freq=None)
# 'raise', default, raises a pytz.AmbiguousTimeError
# 'infer', attempt to determine the correct offset base on the monotonicity of the timestamps
#'NaT', replaces ambiguous times with NaT
# bool, True, a daylight savings time (DST) time; False, non-DST time
dti.tz_localize('US/Eastern', ambiguous="infer")
DatetimeIndex(['2023-04-20 00:00:00-04:00', '2023-04-20 01:00:00-04:00', '2023-04-20 02:00:00-04:00'], dtype='datetime64[ns, US/Eastern]', freq=None)
dti = dti.tz_localize('CET') # specify a time zone
dti
DatetimeIndex(['2023-04-20 00:00:00+02:00', '2023-04-20 01:00:00+02:00', '2023-04-20 02:00:00+02:00'], dtype='datetime64[ns, CET]', freq=None)
dti = dti.tz_convert('US/Eastern') # convert time zone
dti
DatetimeIndex(['2023-04-19 18:00:00-04:00', '2023-04-19 19:00:00-04:00', '2023-04-19 20:00:00-04:00'], dtype='datetime64[ns, US/Eastern]', freq=None)
df = pd.read_csv('aapl.csv', parse_dates=['Date'], index_col = "Date")
df.head()
Open | High | Low | Close | Volume | Adj Close | |
---|---|---|---|---|---|---|
Date | ||||||
2008-10-14 | 116.26 | 116.40 | 103.14 | 104.08 | 70749800 | 104.08 |
2008-10-13 | 104.55 | 110.53 | 101.02 | 110.26 | 54967000 | 110.26 |
2008-10-10 | 85.70 | 100.00 | 85.00 | 96.80 | 79260700 | 96.80 |
2008-10-09 | 93.35 | 95.80 | 86.60 | 88.74 | 57763700 | 88.74 |
2008-10-08 | 85.91 | 96.33 | 85.68 | 89.79 | 78847900 | 89.79 |
# down sample
df.resample('M').mean()
Open | High | Low | Close | Volume | Adj Close | |
---|---|---|---|---|---|---|
Date | ||||||
1984-09-30 | 26.981250 | 27.333125 | 26.606250 | 26.738750 | 4.807300e+06 | 3.051875 |
1984-10-31 | 25.035652 | 25.313478 | 24.780435 | 24.806957 | 5.559409e+06 | 2.830435 |
1984-11-30 | 24.545238 | 24.782857 | 24.188095 | 24.236190 | 5.749562e+06 | 2.764762 |
1984-12-31 | 27.060000 | 27.378500 | 26.841000 | 26.947500 | 6.195360e+06 | 3.075000 |
1985-01-31 | 29.520000 | 29.855909 | 29.140000 | 29.253182 | 1.035382e+07 | 3.337727 |
... | ... | ... | ... | ... | ... | ... |
2008-06-30 | 178.928571 | 181.549524 | 175.414286 | 178.485714 | 3.311754e+07 | 178.485714 |
2008-07-31 | 167.790455 | 170.846818 | 164.117273 | 167.748636 | 3.206130e+07 | 167.748636 |
2008-08-31 | 170.620476 | 173.298571 | 168.745238 | 171.069524 | 2.225062e+07 | 171.069524 |
2008-09-30 | 143.129524 | 146.230000 | 137.534286 | 141.243333 | 4.091339e+07 | 141.243333 |
2008-10-31 | 100.214000 | 104.699000 | 93.997000 | 98.326000 | 6.696761e+07 | 98.326000 |
290 rows × 6 columns
# up sample
df.resample('H').ffill()
Open | High | Low | Close | Volume | Adj Close | |
---|---|---|---|---|---|---|
Date | ||||||
1984-09-07 00:00:00 | 26.50 | 26.87 | 26.25 | 26.50 | 2981600 | 3.02 |
1984-09-07 01:00:00 | 26.50 | 26.87 | 26.25 | 26.50 | 2981600 | 3.02 |
1984-09-07 02:00:00 | 26.50 | 26.87 | 26.25 | 26.50 | 2981600 | 3.02 |
1984-09-07 03:00:00 | 26.50 | 26.87 | 26.25 | 26.50 | 2981600 | 3.02 |
1984-09-07 04:00:00 | 26.50 | 26.87 | 26.25 | 26.50 | 2981600 | 3.02 |
... | ... | ... | ... | ... | ... | ... |
2008-10-13 20:00:00 | 104.55 | 110.53 | 101.02 | 110.26 | 54967000 | 110.26 |
2008-10-13 21:00:00 | 104.55 | 110.53 | 101.02 | 110.26 | 54967000 | 110.26 |
2008-10-13 22:00:00 | 104.55 | 110.53 | 101.02 | 110.26 | 54967000 | 110.26 |
2008-10-13 23:00:00 | 104.55 | 110.53 | 101.02 | 110.26 | 54967000 | 110.26 |
2008-10-14 00:00:00 | 116.26 | 116.40 | 103.14 | 104.08 | 70749800 | 104.08 |
211273 rows × 6 columns
# group in resampling
groups = df.resample('M') # DatetimeIndexResampler
for element in group:
print(element[1]) # DataFrame
break
Open High Low Close Volume Adj Close Date 1984-09-07 26.50 26.87 26.25 26.50 2981600 3.02 1984-09-10 26.50 26.62 25.87 26.37 2346400 3.01 1984-09-11 26.62 27.37 26.62 26.87 5444000 3.07 1984-09-12 26.87 27.00 26.12 26.12 4773600 2.98 1984-09-13 27.50 27.62 27.50 27.50 7429600 3.14 1984-09-14 27.62 28.50 27.62 27.87 8826400 3.18 1984-09-17 28.62 29.00 28.62 28.62 6886400 3.27 1984-09-18 28.62 28.87 27.62 27.62 3495200 3.15 1984-09-19 27.62 27.87 27.00 27.00 3816000 3.08 1984-09-20 27.12 27.37 27.12 27.12 2387200 3.09 1984-09-21 27.12 27.87 26.50 26.87 3591200 3.07 1984-09-24 26.87 27.00 26.62 26.62 2833600 3.04 1984-09-25 26.50 26.50 26.12 26.12 5977600 2.98 1984-09-26 26.12 27.25 25.75 25.75 3987200 2.94 1984-09-27 25.75 25.87 25.75 25.75 3796000 2.94 1984-09-28 25.75 25.75 24.62 25.12 8344800 2.87
# origin
# ‘epoch’: origin is 1970-01-01
# ‘start’: origin is the first value of the timeseries
# ‘start_day’: origin is the first day at midnight of the timeseries
# ‘end’: origin is the last value of the timeseries
# ‘end_day’: origin is the ceiling midnight of the last day
start, end = "2000-10-01 23:30:00", "2000-10-02 00:30:00"
rng = pd.date_range(start, end, freq="7min")
ts = pd.Series(np.arange(len(rng)) * 3, index=rng)
display(ts)
group = ts.resample("20min", origin="start_day")
for element in group:
print(element[1])
2000-10-01 23:30:00 0 2000-10-01 23:37:00 3 2000-10-01 23:44:00 6 2000-10-01 23:51:00 9 2000-10-01 23:58:00 12 2000-10-02 00:05:00 15 2000-10-02 00:12:00 18 2000-10-02 00:19:00 21 2000-10-02 00:26:00 24 Freq: 7T, dtype: int64
2000-10-01 23:30:00 0 2000-10-01 23:37:00 3 Freq: 7T, dtype: int64 2000-10-01 23:44:00 6 2000-10-01 23:51:00 9 2000-10-01 23:58:00 12 Freq: 7T, dtype: int64 2000-10-02 00:05:00 15 2000-10-02 00:12:00 18 2000-10-02 00:19:00 21 Freq: 7T, dtype: int64 2000-10-02 00:26:00 24 Freq: 7T, dtype: int64
df.truncate(before="2001-11", after="2001-12")
Open | High | Low | Close | Volume | Adj Close | |
---|---|---|---|---|---|---|
Date | ||||||
2001-11-30 | 20.47 | 21.44 | 20.25 | 21.30 | 10854000 | 10.65 |
2001-11-29 | 20.60 | 20.70 | 20.19 | 20.42 | 7241600 | 10.21 |
2001-11-28 | 20.85 | 21.21 | 20.41 | 20.53 | 8950400 | 10.27 |
2001-11-27 | 21.20 | 21.52 | 20.50 | 21.00 | 9591200 | 10.50 |
2001-11-26 | 19.94 | 21.55 | 19.88 | 21.37 | 16453200 | 10.69 |
2001-11-23 | 19.71 | 19.95 | 19.57 | 19.84 | 2143000 | 9.92 |
2001-11-21 | 19.61 | 19.80 | 19.26 | 19.68 | 7199400 | 9.84 |
2001-11-20 | 19.82 | 20.20 | 19.50 | 19.53 | 9878000 | 9.77 |
2001-11-19 | 19.00 | 20.05 | 18.96 | 20.00 | 11878200 | 10.00 |
2001-11-16 | 19.27 | 19.29 | 18.40 | 18.97 | 8238000 | 9.48 |
2001-11-15 | 19.45 | 19.90 | 19.23 | 19.45 | 7608200 | 9.73 |
2001-11-14 | 19.59 | 19.90 | 19.15 | 19.61 | 7898200 | 9.81 |
2001-11-13 | 19.08 | 19.39 | 18.71 | 19.37 | 8024000 | 9.69 |
2001-11-12 | 18.66 | 19.17 | 17.96 | 18.75 | 7196400 | 9.38 |
2001-11-09 | 18.60 | 19.25 | 18.55 | 18.71 | 4796200 | 9.35 |
2001-11-08 | 19.63 | 19.89 | 18.57 | 18.71 | 12219400 | 9.35 |
2001-11-07 | 19.46 | 20.13 | 19.33 | 19.59 | 13678200 | 9.80 |
2001-11-06 | 18.96 | 19.62 | 18.53 | 19.57 | 11286400 | 9.78 |
2001-11-05 | 18.84 | 19.25 | 18.61 | 19.07 | 8421200 | 9.53 |
2001-11-02 | 18.52 | 18.86 | 18.16 | 18.57 | 7043000 | 9.28 |
2001-11-01 | 17.65 | 18.78 | 17.25 | 18.59 | 11178400 | 9.30 |
rng = pd.date_range("2023-04-20", periods=3, freq="H")
# shift
pd.Series(range(len(rng)), index = rng).shift(periods = 1)
2023-04-20 00:00:00 NaN 2023-04-20 01:00:00 0.0 2023-04-20 02:00:00 1.0 Freq: H, dtype: float64
# lagging
pd.Series(range(len(rng)), index = rng).shift(periods = -1)
2023-04-20 00:00:00 1.0 2023-04-20 01:00:00 2.0 2023-04-20 02:00:00 NaN Freq: H, dtype: float64
rng = pd.date_range("2023-04-20", periods=3, freq="H")
s = pd.Series(range(len(rng)), index = rng)
s.asfreq('Min')
2023-04-20 00:00:00 0.0 2023-04-20 00:01:00 NaN 2023-04-20 00:02:00 NaN 2023-04-20 00:03:00 NaN 2023-04-20 00:04:00 NaN ... 2023-04-20 01:56:00 NaN 2023-04-20 01:57:00 NaN 2023-04-20 01:58:00 NaN 2023-04-20 01:59:00 NaN 2023-04-20 02:00:00 2.0 Freq: T, Length: 121, dtype: float64
ts = pd.Timestamp("2016-10-30 00:00:00", tz="Europe/Helsinki")
ts + pd.Timedelta(days=1)
Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki')
ts = pd.Timestamp("2016-10-30 00:00:00", tz="Europe/Helsinki")
ts + pd.DateOffset(days=1)
Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki')
friday = pd.Timestamp("2018-01-05") # Friday
friday + 2 * pd.offsets.BDay()
Timestamp('2018-01-09 00:00:00')
ts = pd.Timestamp("2018-01-06 00:00:00") # Saturday
offset = pd.offsets.BusinessHour(start="09:00")
ts + offset
Timestamp('2018-01-08 10:00:00')
# reset time to midnight
(ts + offset).normalize()
Timestamp('2018-01-08 00:00:00')
# Series
rng = pd.date_range("2012-01-01", "2012-01-03")
rng + pd.DateOffset(months=2)
DatetimeIndex(['2012-03-01', '2012-03-02', '2012-03-03'], dtype='datetime64[ns]', freq=None)
# business day
from pandas.tseries.holiday import USFederalHolidayCalendar
dt = datetime.datetime(2014, 1, 17) # Friday before the MLK day
bday_us = pd.offsets.CustomBusinessDay(calendar=USFederalHolidayCalendar())
dt+bday_us # Tuesday
Timestamp('2014-01-21 00:00:00')
# business hour
bh = pd.offsets.BusinessHour()
pd.Timestamp("2014-08-01 10:00") + bh # Timestamp('2014-08-01 11:00:00')
pd.Timestamp("2014-08-01 16:30") + bh # Timestamp('2014-08-04 09:30:00')
Timestamp('2014-08-04 09:30:00')
# anchored offset semantics
pd.Timestamp("2014-01-02") + pd.offsets.MonthBegin(n=1)
Timestamp('2014-02-01 00:00:00')
rng = pd.date_range("3/6/2012 00:00", periods=15, freq="D") # by default, there no time zone
rng = rng.tz_localize('CET') # assign a time zone
rng
DatetimeIndex(['2012-03-06 00:00:00+01:00', '2012-03-07 00:00:00+01:00', '2012-03-08 00:00:00+01:00', '2012-03-09 00:00:00+01:00', '2012-03-10 00:00:00+01:00', '2012-03-11 00:00:00+01:00', '2012-03-12 00:00:00+01:00', '2012-03-13 00:00:00+01:00', '2012-03-14 00:00:00+01:00', '2012-03-15 00:00:00+01:00', '2012-03-16 00:00:00+01:00', '2012-03-17 00:00:00+01:00', '2012-03-18 00:00:00+01:00', '2012-03-19 00:00:00+01:00', '2012-03-20 00:00:00+01:00'], dtype='datetime64[ns, CET]', freq=None)
# pytz
import pytz
pytz.all_timezones
rng = rng.tz_convert('Zulu') # convert time zone
rng
DatetimeIndex(['2012-03-05 23:00:00+00:00', '2012-03-06 23:00:00+00:00', '2012-03-07 23:00:00+00:00', '2012-03-08 23:00:00+00:00', '2012-03-09 23:00:00+00:00', '2012-03-10 23:00:00+00:00', '2012-03-11 23:00:00+00:00', '2012-03-12 23:00:00+00:00', '2012-03-13 23:00:00+00:00', '2012-03-14 23:00:00+00:00', '2012-03-15 23:00:00+00:00', '2012-03-16 23:00:00+00:00', '2012-03-17 23:00:00+00:00', '2012-03-18 23:00:00+00:00', '2012-03-19 23:00:00+00:00'], dtype='datetime64[ns, Zulu]', freq=None)
# dateutil
from dateutil.zoneinfo import get_zonefile_instance
get_zonefile_instance().zones
rng = rng.tz_convert('dateutil/US/Eastern') # convert time zone
rng
DatetimeIndex(['2012-03-05 18:00:00-05:00', '2012-03-06 18:00:00-05:00', '2012-03-07 18:00:00-05:00', '2012-03-08 18:00:00-05:00', '2012-03-09 18:00:00-05:00', '2012-03-10 18:00:00-05:00', '2012-03-11 19:00:00-04:00', '2012-03-12 19:00:00-04:00', '2012-03-13 19:00:00-04:00', '2012-03-14 19:00:00-04:00', '2012-03-15 19:00:00-04:00', '2012-03-16 19:00:00-04:00', '2012-03-17 19:00:00-04:00', '2012-03-18 19:00:00-04:00', '2012-03-19 19:00:00-04:00'], dtype='datetime64[ns, tzfile('/usr/share/zoneinfo/US/Eastern')]', freq=None)
# datetime
rng = rng.tz_convert(datetime.timezone.utc) # convert time zone
rng
DatetimeIndex(['2012-03-05 23:00:00+00:00', '2012-03-06 23:00:00+00:00', '2012-03-07 23:00:00+00:00', '2012-03-08 23:00:00+00:00', '2012-03-09 23:00:00+00:00', '2012-03-10 23:00:00+00:00', '2012-03-11 23:00:00+00:00', '2012-03-12 23:00:00+00:00', '2012-03-13 23:00:00+00:00', '2012-03-14 23:00:00+00:00', '2012-03-15 23:00:00+00:00', '2012-03-16 23:00:00+00:00', '2012-03-17 23:00:00+00:00', '2012-03-18 23:00:00+00:00', '2012-03-19 23:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None)
dti = pd.date_range(start="2015-03-29 02:30:00", periods=3, freq="H")
dti.tz_localize('Europe/Warsaw')
--------------------------------------------------------------------------- NonExistentTimeError Traceback (most recent call last) Cell In[314], line 2 1 dti = pd.date_range(start="2015-03-29 02:30:00", periods=3, freq="H") ----> 2 dti.tz_localize('Europe/Warsaw') File /opt/anaconda3/envs/python3.11/lib/python3.11/site-packages/pandas/core/indexes/datetimes.py:281, in DatetimeIndex.tz_localize(self, tz, ambiguous, nonexistent) 279 @doc(DatetimeArray.tz_localize) 280 def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeIndex: --> 281 arr = self._data.tz_localize(tz, ambiguous, nonexistent) 282 return type(self)._simple_new(arr, name=self.name) File /opt/anaconda3/envs/python3.11/lib/python3.11/site-packages/pandas/core/arrays/_mixins.py:85, in ravel_compat.<locals>.method(self, *args, **kwargs) 82 @wraps(meth) 83 def method(self, *args, **kwargs): 84 if self.ndim == 1: ---> 85 return meth(self, *args, **kwargs) 87 flags = self._ndarray.flags 88 flat = self.ravel("K") File /opt/anaconda3/envs/python3.11/lib/python3.11/site-packages/pandas/core/arrays/datetimes.py:987, in DatetimeArray.tz_localize(self, tz, ambiguous, nonexistent) 984 tz = timezones.maybe_get_tz(tz) 985 # Convert to UTC --> 987 new_dates = tzconversion.tz_localize_to_utc( 988 self.asi8, 989 tz, 990 ambiguous=ambiguous, 991 nonexistent=nonexistent, 992 reso=self._reso, 993 ) 994 new_dates = new_dates.view(f"M8[{self._unit}]") 995 dtype = tz_to_dtype(tz, unit=self._unit) File /opt/anaconda3/envs/python3.11/lib/python3.11/site-packages/pandas/_libs/tslibs/tzconversion.pyx:391, in pandas._libs.tslibs.tzconversion.tz_localize_to_utc() NonExistentTimeError: 2015-03-29 02:30:00