diff --git a/ch10.ipynb b/ch10.ipynb index fdda16547..3a882fcdd 100644 --- a/ch10.ipynb +++ b/ch10.ipynb @@ -1,2108 +1,2242 @@ { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Time series" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from __future__ import division\n", + "from pandas import Series, DataFrame\n", + "import pandas as pd\n", + "from numpy.random import randn\n", + "import numpy as np\n", + "pd.options.display.max_rows = 12\n", + "np.set_printoptions(precision=4, suppress=True)\n", + "import matplotlib.pyplot as plt\n", + "plt.rc('figure', figsize=(12, 4))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Date and Time Data Types and Tools" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "now = datetime.now()\n", + "now" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "now.year, now.month, now.day" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)\n", + "delta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "delta.days" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "delta.seconds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from datetime import timedelta\n", + "start = datetime(2011, 1, 7)\n", + "start + timedelta(12)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "start - 2 * timedelta(12)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Converting between string and datetime" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "stamp = datetime(2011, 1, 3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "str(stamp)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "stamp.strftime('%Y-%m-%d')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "value = '2011-01-03'\n", + "datetime.strptime(value, '%Y-%m-%d')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "datestrs = ['7/6/2011', '8/6/2011']\n", + "[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from dateutil.parser import parse\n", + "parse('2011-01-03')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "parse('Jan 31, 1997 10:45 PM')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "parse('6/12/2011', dayfirst=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "datestrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.to_datetime(datestrs)\n", + "# note: output changed (no '00:00:00' anymore)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "idx = pd.to_datetime(datestrs + [None])\n", + "idx" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "idx[2]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.isnull(idx)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Time Series Basics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "dates = [datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7),\n", + " datetime(2011, 1, 8), datetime(2011, 1, 10), datetime(2011, 1, 12)]\n", + "ts = Series(np.random.randn(6), index=dates)\n", + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "type(ts)\n", + "# note: output changed to \"pandas.core.series.Series\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts + ts[::2]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.index.dtype\n", + "# note: output changed from dtype('datetime64[ns]') to dtype(' to Timestamp('2011-01-02 00:00:00')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Indexing, selection, subsetting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "stamp = ts.index[2]\n", + "ts[stamp]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts['1/10/2011']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts['20110110']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "longer_ts = Series(np.random.randn(1000),\n", + " index=pd.date_range('1/1/2000', periods=1000))\n", + "longer_ts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "longer_ts['2001']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "longer_ts['2001-05']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts[datetime(2011, 1, 7):]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts['1/6/2011':'1/11/2011']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.truncate(after='1/9/2011')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')\n", + "long_df = DataFrame(np.random.randn(100, 4),\n", + " index=dates,\n", + " columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n", + "long_df.ix['5-2001']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Time series with duplicate indices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/2/2000',\n", + " '1/3/2000'])\n", + "dup_ts = Series(np.arange(5), index=dates)\n", + "dup_ts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "dup_ts.index.is_unique" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "dup_ts['1/3/2000'] # not duplicated" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "dup_ts['1/2/2000'] # duplicated" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "grouped = dup_ts.groupby(level=0)\n", + "grouped.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "grouped.count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Date ranges, Frequencies, and Shifting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.resample('D')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generating date ranges" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "index = pd.date_range('4/1/2012', '6/1/2012')\n", + "index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.date_range(start='4/1/2012', periods=20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.date_range(end='6/1/2012', periods=20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.date_range('1/1/2000', '12/1/2000', freq='BM')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.date_range('5/2/2012 12:56:31', periods=5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.date_range('5/2/2012 12:56:31', periods=5, normalize=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Frequencies and Date Offsets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from pandas.tseries.offsets import Hour, Minute\n", + "hour = Hour()\n", + "hour" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "four_hours = Hour(4)\n", + "four_hours" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "Hour(2) + Minute(30)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.date_range('1/1/2000', periods=10, freq='1h30min')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Week of month dates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rng = pd.date_range('1/1/2012', '9/1/2012', freq='WOM-3FRI')\n", + "list(rng)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Shifting (leading and lagging) data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts = Series(np.random.randn(4),\n", + " index=pd.date_range('1/1/2000', periods=4, freq='M'))\n", + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.shift(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.shift(-2)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "ts / ts.shift(1) - 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.shift(2, freq='M')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.shift(3, freq='D')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.shift(1, freq='3D')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.shift(1, freq='90T')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Shifting dates with offsets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from pandas.tseries.offsets import Day, MonthEnd\n", + "now = datetime(2011, 11, 17)\n", + "now + 3 * Day()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "now + MonthEnd()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "now + MonthEnd(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "offset = MonthEnd()\n", + "offset.rollforward(now)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "offset.rollback(now)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts = Series(np.random.randn(20),\n", + " index=pd.date_range('1/15/2000', periods=20, freq='4d'))\n", + "ts.groupby(offset.rollforward).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.resample('M', how='mean')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Time Zone Handling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import pytz\n", + "pytz.common_timezones[-5:]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tz = pytz.timezone('US/Eastern')\n", + "tz" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Localization and Conversion" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')\n", + "ts = Series(np.random.randn(len(rng)), index=rng)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(ts.index.tz)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts_utc = ts.tz_localize('UTC')\n", + "ts_utc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts_utc.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts_utc.tz_convert('US/Eastern')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts_eastern = ts.tz_localize('US/Eastern')\n", + "ts_eastern.tz_convert('UTC')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts_eastern.tz_convert('Europe/Berlin')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.index.tz_localize('Asia/Shanghai')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Operations with time zone-aware Timestamp objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "stamp = pd.Timestamp('2011-03-12 04:00')\n", + "stamp_utc = stamp.tz_localize('utc')\n", + "stamp_utc.tz_convert('US/Eastern')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')\n", + "stamp_moscow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "stamp_utc.value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "stamp_utc.tz_convert('US/Eastern').value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# 30 minutes before DST transition\n", + "from pandas.tseries.offsets import Hour\n", + "stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')\n", + "stamp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "stamp + Hour()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# 90 minutes before DST transition\n", + "stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')\n", + "stamp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "stamp + 2 * Hour()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Operations between different time zones" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')\n", + "ts = Series(np.random.randn(len(rng)), index=rng)\n", + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts1 = ts[:7].tz_localize('Europe/London')\n", + "ts2 = ts1[2:].tz_convert('Europe/Moscow')\n", + "result = ts1 + ts2\n", + "result.index" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Periods and Period Arithmetic" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p = pd.Period(2007, freq='A-DEC')\n", + "p" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p + 5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p - 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.Period('2014', freq='A-DEC') - p" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rng = pd.period_range('1/1/2000', '6/30/2000', freq='M')\n", + "rng" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "Series(np.random.randn(6), index=rng)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "values = ['2001Q3', '2002Q2', '2003Q1']\n", + "index = pd.PeriodIndex(values, freq='Q-DEC')\n", + "index" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Period Frequency Conversion" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p = pd.Period('2007', freq='A-DEC')\n", + "p.asfreq('M', how='start')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p.asfreq('M', how='end')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p = pd.Period('2007', freq='A-JUN')\n", + "p.asfreq('M', 'start')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p.asfreq('M', 'end')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p = pd.Period('Aug-2007', 'M')\n", + "p.asfreq('A-JUN')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rng = pd.period_range('2006', '2009', freq='A-DEC')\n", + "ts = Series(np.random.randn(len(rng)), index=rng)\n", + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.asfreq('M', how='start')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.asfreq('B', how='end')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Quarterly period frequencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p = pd.Period('2012Q4', freq='Q-JAN')\n", + "p" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p.asfreq('D', 'start')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p.asfreq('D', 'end')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60\n", + "p4pm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p4pm.to_timestamp()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')\n", + "ts = Series(np.arange(len(rng)), index=rng)\n", + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60\n", + "ts.index = new_rng.to_timestamp()\n", + "ts" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Converting Timestamps to Periods (and back)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rng = pd.date_range('1/1/2000', periods=3, freq='M')\n", + "ts = Series(randn(3), index=rng)\n", + "pts = ts.to_period()\n", + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rng = pd.date_range('1/29/2000', periods=6, freq='D')\n", + "ts2 = Series(randn(6), index=rng)\n", + "ts2.to_period('M')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pts = ts.to_period()\n", + "pts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pts.to_timestamp(how='end')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating a PeriodIndex from arrays" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "data = pd.read_csv('ch08/macrodata.csv')\n", + "data.year" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "data.quarter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "index = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC')\n", + "index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "data.index = index\n", + "data.infl" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Resampling and Frequency Conversion" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rng = pd.date_range('1/1/2000', periods=100, freq='D')\n", + "ts = Series(randn(len(rng)), index=rng)\n", + "ts.resample('M', how='mean')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.resample('M', how='mean', kind='period')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Downsampling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rng = pd.date_range('1/1/2000', periods=12, freq='T')\n", + "ts = Series(np.arange(12), index=rng)\n", + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.resample('5min', how='sum')\n", + "# note: output changed (as the default changed from closed='right', label='right' to closed='left', label='left'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.resample('5min', how='sum', closed='left')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.resample('5min', how='sum', closed='left', label='left')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.resample('5min', how='sum', loffset='-1s')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Open-High-Low-Close (OHLC) resampling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.resample('5min', how='ohlc')\n", + "# note: output changed because of changed defaults" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Resampling with GroupBy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rng = pd.date_range('1/1/2000', periods=100, freq='D')\n", + "ts = Series(np.arange(100), index=rng)\n", + "ts.groupby(lambda x: x.month).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.groupby(lambda x: x.weekday).mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Upsampling and interpolation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "frame = DataFrame(np.random.randn(2, 4),\n", + " index=pd.date_range('1/1/2000', periods=2, freq='W-WED'),\n", + " columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n", + "frame" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df_daily = frame.resample('D')\n", + "df_daily" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "frame.resample('D', fill_method='ffill')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "frame.resample('D', fill_method='ffill', limit=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "frame.resample('W-THU', fill_method='ffill')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Resampling with periods" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "frame = DataFrame(np.random.randn(24, 4),\n", + " index=pd.period_range('1-2000', '12-2001', freq='M'),\n", + " columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n", + "frame[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "annual_frame = frame.resample('A-DEC', how='mean')\n", + "annual_frame" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Q-DEC: Quarterly, year ending in December\n", + "annual_frame.resample('Q-DEC', fill_method='ffill')\n", + "# note: output changed, default value changed from convention='end' to convention='start' + 'start' changed to span-like\n", + "# also the following cells" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "annual_frame.resample('Q-DEC', fill_method='ffill', convention='start')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "annual_frame.resample('Q-MAR', fill_method='ffill')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Time series plotting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "close_px_all = pd.read_csv('ch09/stock_px.csv', parse_dates=True, index_col=0)\n", + "close_px = close_px_all[['AAPL', 'MSFT', 'XOM']]\n", + "close_px = close_px.resample('B', fill_method='ffill')\n", + "close_px.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "close_px['AAPL'].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "close_px.ix['2009'].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "close_px['AAPL'].ix['01-2011':'03-2011'].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "appl_q = close_px['AAPL'].resample('Q-DEC', fill_method='ffill')\n", + "appl_q.ix['2009':].plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Moving window functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "close_px = close_px.asfreq('B').fillna(method='ffill')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "close_px.AAPL.plot()\n", + "pd.rolling_mean(close_px.AAPL, 250).plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.figure()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "appl_std250 = pd.rolling_std(close_px.AAPL, 250, min_periods=10)\n", + "appl_std250[5:12]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "appl_std250.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Define expanding mean in terms of rolling_mean\n", + "expanding_mean = lambda x: rolling_mean(x, len(x), min_periods=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.rolling_mean(close_px, 60).plot(logy=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.close('all')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Exponentially-weighted functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True, sharey=True,\n", + " figsize=(12, 7))\n", + "\n", + "aapl_px = close_px.AAPL['2005':'2009']\n", + "\n", + "ma60 = pd.rolling_mean(aapl_px, 60, min_periods=50)\n", + "ewma60 = pd.ewma(aapl_px, span=60)\n", + "\n", + "aapl_px.plot(style='k-', ax=axes[0])\n", + "ma60.plot(style='k--', ax=axes[0])\n", + "aapl_px.plot(style='k-', ax=axes[1])\n", + "ewma60.plot(style='k--', ax=axes[1])\n", + "axes[0].set_title('Simple MA')\n", + "axes[1].set_title('Exponentially-weighted MA')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Binary moving window functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "close_px\n", + "spx_px = close_px_all['SPX']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "spx_rets = spx_px / spx_px.shift(1) - 1\n", + "returns = close_px.pct_change()\n", + "corr = pd.rolling_corr(returns.AAPL, spx_rets, 125, min_periods=100)\n", + "corr.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "corr = pd.rolling_corr(returns, spx_rets, 125, min_periods=100)\n", + "corr.plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### User-defined moving window functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from scipy.stats import percentileofscore\n", + "score_at_2percent = lambda x: percentileofscore(x, 0.02)\n", + "result = pd.rolling_apply(returns.AAPL, 250, score_at_2percent)\n", + "result.plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Performance and Memory Usage Notes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rng = pd.date_range('1/1/2000', periods=10000000, freq='10ms')\n", + "ts = Series(np.random.randn(len(rng)), index=rng)\n", + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.resample('15min', how='ohlc').info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "%timeit ts.resample('15min', how='ohlc')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rng = pd.date_range('1/1/2000', periods=10000000, freq='1s')\n", + "ts = Series(np.random.randn(len(rng)), index=rng)\n", + "%timeit ts.resample('15s', how='ohlc')" + ] + } + ], "metadata": { - "name": "", - "signature": "sha256:af14713612af6df3af27b7546e7130b724231374a9fb48b739f50bf0b51b776c" - }, - "nbformat": 3, - "nbformat_minor": 0, - "worksheets": [ - { - "cells": [ - { - "cell_type": "heading", - "level": 1, - "metadata": {}, - "source": [ - "Time series" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from __future__ import division\n", - "from pandas import Series, DataFrame\n", - "import pandas as pd\n", - "from numpy.random import randn\n", - "import numpy as np\n", - "pd.options.display.max_rows = 12\n", - "np.set_printoptions(precision=4, suppress=True)\n", - "import matplotlib.pyplot as plt\n", - "plt.rc('figure', figsize=(12, 4))" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "%cd ../book_scripts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "%matplotlib inline" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Date and Time Data Types and Tools" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from datetime import datetime\n", - "now = datetime.now()\n", - "now" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "now.year, now.month, now.day" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)\n", - "delta" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "delta.days" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "delta.seconds" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from datetime import timedelta\n", - "start = datetime(2011, 1, 7)\n", - "start + timedelta(12)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "start - 2 * timedelta(12)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Converting between string and datetime" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "stamp = datetime(2011, 1, 3)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "str(stamp)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "stamp.strftime('%Y-%m-%d')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "value = '2011-01-03'\n", - "datetime.strptime(value, '%Y-%m-%d')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "datestrs = ['7/6/2011', '8/6/2011']\n", - "[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from dateutil.parser import parse\n", - "parse('2011-01-03')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "parse('Jan 31, 1997 10:45 PM')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "parse('6/12/2011', dayfirst=True)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "datestrs" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.to_datetime(datestrs)\n", - "# note: output changed (no '00:00:00' anymore)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "idx = pd.to_datetime(datestrs + [None])\n", - "idx" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "idx[2]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.isnull(idx)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Time Series Basics" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from datetime import datetime\n", - "dates = [datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7),\n", - " datetime(2011, 1, 8), datetime(2011, 1, 10), datetime(2011, 1, 12)]\n", - "ts = Series(np.random.randn(6), index=dates)\n", - "ts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "type(ts)\n", - "# note: output changed to \"pandas.core.series.Series\"" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.index" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts + ts[::2]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.index.dtype\n", - "# note: output changed from dtype('datetime64[ns]') to dtype(' to Timestamp('2011-01-02 00:00:00')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Indexing, selection, subsetting" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "stamp = ts.index[2]\n", - "ts[stamp]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts['1/10/2011']" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts['20110110']" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "longer_ts = Series(np.random.randn(1000),\n", - " index=pd.date_range('1/1/2000', periods=1000))\n", - "longer_ts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "longer_ts['2001']" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "longer_ts['2001-05']" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts[datetime(2011, 1, 7):]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts['1/6/2011':'1/11/2011']" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.truncate(after='1/9/2011')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')\n", - "long_df = DataFrame(np.random.randn(100, 4),\n", - " index=dates,\n", - " columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n", - "long_df.ix['5-2001']" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Time series with duplicate indices" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/2/2000',\n", - " '1/3/2000'])\n", - "dup_ts = Series(np.arange(5), index=dates)\n", - "dup_ts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "dup_ts.index.is_unique" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "dup_ts['1/3/2000'] # not duplicated" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "dup_ts['1/2/2000'] # duplicated" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "grouped = dup_ts.groupby(level=0)\n", - "grouped.mean()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "grouped.count()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Date ranges, Frequencies, and Shifting" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.resample('D')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Generating date ranges" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "index = pd.date_range('4/1/2012', '6/1/2012')\n", - "index" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.date_range(start='4/1/2012', periods=20)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.date_range(end='6/1/2012', periods=20)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.date_range('1/1/2000', '12/1/2000', freq='BM')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.date_range('5/2/2012 12:56:31', periods=5)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.date_range('5/2/2012 12:56:31', periods=5, normalize=True)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Frequencies and Date Offsets" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from pandas.tseries.offsets import Hour, Minute\n", - "hour = Hour()\n", - "hour" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "four_hours = Hour(4)\n", - "four_hours" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "Hour(2) + Minute(30)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.date_range('1/1/2000', periods=10, freq='1h30min')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 4, - "metadata": {}, - "source": [ - "Week of month dates" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "rng = pd.date_range('1/1/2012', '9/1/2012', freq='WOM-3FRI')\n", - "list(rng)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Shifting (leading and lagging) data" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts = Series(np.random.randn(4),\n", - " index=pd.date_range('1/1/2000', periods=4, freq='M'))\n", - "ts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.shift(2)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.shift(-2)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "raw", - "metadata": {}, - "source": [ - "ts / ts.shift(1) - 1" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.shift(2, freq='M')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.shift(3, freq='D')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.shift(1, freq='3D')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.shift(1, freq='90T')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 4, - "metadata": {}, - "source": [ - "Shifting dates with offsets" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from pandas.tseries.offsets import Day, MonthEnd\n", - "now = datetime(2011, 11, 17)\n", - "now + 3 * Day()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "now + MonthEnd()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "now + MonthEnd(2)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "offset = MonthEnd()\n", - "offset.rollforward(now)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "offset.rollback(now)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts = Series(np.random.randn(20),\n", - " index=pd.date_range('1/15/2000', periods=20, freq='4d'))\n", - "ts.groupby(offset.rollforward).mean()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.resample('M', how='mean')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Time Zone Handling" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import pytz\n", - "pytz.common_timezones[-5:]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "tz = pytz.timezone('US/Eastern')\n", - "tz" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Localization and Conversion" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')\n", - "ts = Series(np.random.randn(len(rng)), index=rng)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "print(ts.index.tz)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts_utc = ts.tz_localize('UTC')\n", - "ts_utc" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts_utc.index" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts_utc.tz_convert('US/Eastern')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts_eastern = ts.tz_localize('US/Eastern')\n", - "ts_eastern.tz_convert('UTC')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts_eastern.tz_convert('Europe/Berlin')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.index.tz_localize('Asia/Shanghai')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Operations with time zone-aware Timestamp objects" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "stamp = pd.Timestamp('2011-03-12 04:00')\n", - "stamp_utc = stamp.tz_localize('utc')\n", - "stamp_utc.tz_convert('US/Eastern')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')\n", - "stamp_moscow" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "stamp_utc.value" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "stamp_utc.tz_convert('US/Eastern').value" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# 30 minutes before DST transition\n", - "from pandas.tseries.offsets import Hour\n", - "stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')\n", - "stamp" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "stamp + Hour()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# 90 minutes before DST transition\n", - "stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')\n", - "stamp" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "stamp + 2 * Hour()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Operations between different time zones" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')\n", - "ts = Series(np.random.randn(len(rng)), index=rng)\n", - "ts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts1 = ts[:7].tz_localize('Europe/London')\n", - "ts2 = ts1[2:].tz_convert('Europe/Moscow')\n", - "result = ts1 + ts2\n", - "result.index" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Periods and Period Arithmetic" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "p = pd.Period(2007, freq='A-DEC')\n", - "p" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "p + 5" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "p - 2" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.Period('2014', freq='A-DEC') - p" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "rng = pd.period_range('1/1/2000', '6/30/2000', freq='M')\n", - "rng" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "Series(np.random.randn(6), index=rng)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "values = ['2001Q3', '2002Q2', '2003Q1']\n", - "index = pd.PeriodIndex(values, freq='Q-DEC')\n", - "index" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Period Frequency Conversion" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "p = pd.Period('2007', freq='A-DEC')\n", - "p.asfreq('M', how='start')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "p.asfreq('M', how='end')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "p = pd.Period('2007', freq='A-JUN')\n", - "p.asfreq('M', 'start')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "p.asfreq('M', 'end')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "p = pd.Period('Aug-2007', 'M')\n", - "p.asfreq('A-JUN')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "rng = pd.period_range('2006', '2009', freq='A-DEC')\n", - "ts = Series(np.random.randn(len(rng)), index=rng)\n", - "ts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.asfreq('M', how='start')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.asfreq('B', how='end')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Quarterly period frequencies" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "p = pd.Period('2012Q4', freq='Q-JAN')\n", - "p" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "p.asfreq('D', 'start')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "p.asfreq('D', 'end')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60\n", - "p4pm" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "p4pm.to_timestamp()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')\n", - "ts = Series(np.arange(len(rng)), index=rng)\n", - "ts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60\n", - "ts.index = new_rng.to_timestamp()\n", - "ts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Converting Timestamps to Periods (and back)" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "rng = pd.date_range('1/1/2000', periods=3, freq='M')\n", - "ts = Series(randn(3), index=rng)\n", - "pts = ts.to_period()\n", - "ts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "rng = pd.date_range('1/29/2000', periods=6, freq='D')\n", - "ts2 = Series(randn(6), index=rng)\n", - "ts2.to_period('M')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pts = ts.to_period()\n", - "pts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pts.to_timestamp(how='end')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Creating a PeriodIndex from arrays" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "data = pd.read_csv('ch08/macrodata.csv')\n", - "data.year" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "data.quarter" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "index = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC')\n", - "index" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "data.index = index\n", - "data.infl" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Resampling and Frequency Conversion" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "rng = pd.date_range('1/1/2000', periods=100, freq='D')\n", - "ts = Series(randn(len(rng)), index=rng)\n", - "ts.resample('M', how='mean')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.resample('M', how='mean', kind='period')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Downsampling" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "rng = pd.date_range('1/1/2000', periods=12, freq='T')\n", - "ts = Series(np.arange(12), index=rng)\n", - "ts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.resample('5min', how='sum')\n", - "# note: output changed (as the default changed from closed='right', label='right' to closed='left', label='left'" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.resample('5min', how='sum', closed='left')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.resample('5min', how='sum', closed='left', label='left')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.resample('5min', how='sum', loffset='-1s')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 4, - "metadata": {}, - "source": [ - "Open-High-Low-Close (OHLC) resampling" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.resample('5min', how='ohlc')\n", - "# note: output changed because of changed defaults" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 4, - "metadata": {}, - "source": [ - "Resampling with GroupBy" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "rng = pd.date_range('1/1/2000', periods=100, freq='D')\n", - "ts = Series(np.arange(100), index=rng)\n", - "ts.groupby(lambda x: x.month).mean()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.groupby(lambda x: x.weekday).mean()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Upsampling and interpolation" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "frame = DataFrame(np.random.randn(2, 4),\n", - " index=pd.date_range('1/1/2000', periods=2, freq='W-WED'),\n", - " columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n", - "frame" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "df_daily = frame.resample('D')\n", - "df_daily" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "frame.resample('D', fill_method='ffill')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "frame.resample('D', fill_method='ffill', limit=2)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "frame.resample('W-THU', fill_method='ffill')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Resampling with periods" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "frame = DataFrame(np.random.randn(24, 4),\n", - " index=pd.period_range('1-2000', '12-2001', freq='M'),\n", - " columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n", - "frame[:5]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "annual_frame = frame.resample('A-DEC', how='mean')\n", - "annual_frame" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# Q-DEC: Quarterly, year ending in December\n", - "annual_frame.resample('Q-DEC', fill_method='ffill')\n", - "# note: output changed, default value changed from convention='end' to convention='start' + 'start' changed to span-like\n", - "# also the following cells" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "annual_frame.resample('Q-DEC', fill_method='ffill', convention='start')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "annual_frame.resample('Q-MAR', fill_method='ffill')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Time series plotting" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "close_px_all = pd.read_csv('ch09/stock_px.csv', parse_dates=True, index_col=0)\n", - "close_px = close_px_all[['AAPL', 'MSFT', 'XOM']]\n", - "close_px = close_px.resample('B', fill_method='ffill')\n", - "close_px.info()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "close_px['AAPL'].plot()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "close_px.ix['2009'].plot()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "close_px['AAPL'].ix['01-2011':'03-2011'].plot()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "appl_q = close_px['AAPL'].resample('Q-DEC', fill_method='ffill')\n", - "appl_q.ix['2009':].plot()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Moving window functions" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "close_px = close_px.asfreq('B').fillna(method='ffill')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "close_px.AAPL.plot()\n", - "pd.rolling_mean(close_px.AAPL, 250).plot()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "plt.figure()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "appl_std250 = pd.rolling_std(close_px.AAPL, 250, min_periods=10)\n", - "appl_std250[5:12]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "appl_std250.plot()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# Define expanding mean in terms of rolling_mean\n", - "expanding_mean = lambda x: rolling_mean(x, len(x), min_periods=1)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.rolling_mean(close_px, 60).plot(logy=True)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "plt.close('all')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Exponentially-weighted functions" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True, sharey=True,\n", - " figsize=(12, 7))\n", - "\n", - "aapl_px = close_px.AAPL['2005':'2009']\n", - "\n", - "ma60 = pd.rolling_mean(aapl_px, 60, min_periods=50)\n", - "ewma60 = pd.ewma(aapl_px, span=60)\n", - "\n", - "aapl_px.plot(style='k-', ax=axes[0])\n", - "ma60.plot(style='k--', ax=axes[0])\n", - "aapl_px.plot(style='k-', ax=axes[1])\n", - "ewma60.plot(style='k--', ax=axes[1])\n", - "axes[0].set_title('Simple MA')\n", - "axes[1].set_title('Exponentially-weighted MA')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Binary moving window functions" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "close_px\n", - "spx_px = close_px_all['SPX']" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "spx_rets = spx_px / spx_px.shift(1) - 1\n", - "returns = close_px.pct_change()\n", - "corr = pd.rolling_corr(returns.AAPL, spx_rets, 125, min_periods=100)\n", - "corr.plot()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "corr = pd.rolling_corr(returns, spx_rets, 125, min_periods=100)\n", - "corr.plot()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "User-defined moving window functions" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from scipy.stats import percentileofscore\n", - "score_at_2percent = lambda x: percentileofscore(x, 0.02)\n", - "result = pd.rolling_apply(returns.AAPL, 250, score_at_2percent)\n", - "result.plot()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Performance and Memory Usage Notes" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "rng = pd.date_range('1/1/2000', periods=10000000, freq='10ms')\n", - "ts = Series(np.random.randn(len(rng)), index=rng)\n", - "ts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.resample('15min', how='ohlc').info()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "%timeit ts.resample('15min', how='ohlc')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "rng = pd.date_range('1/1/2000', periods=10000000, freq='1s')\n", - "ts = Series(np.random.randn(len(rng)), index=rng)\n", - "%timeit ts.resample('15s', how='ohlc')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - } - ], - "metadata": {} + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.10" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 0 +}