Skip to content

Commit 388c44c

Browse files
committed
Second Edition - Alpha
1 parent e951ab0 commit 388c44c

File tree

331 files changed

+200239
-424460
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

331 files changed

+200239
-424460
lines changed

01_machine_learning_for_trading/README.md

+76-24
Large diffs are not rendered by default.

02_market_and_fundamental_data/01_NASDAQ_TotalView-ITCH_Order_Book/01_parse_itch_order_flow_messages.ipynb

+50-28
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,16 @@
7070
"## Imports"
7171
]
7272
},
73+
{
74+
"cell_type": "code",
75+
"execution_count": null,
76+
"metadata": {},
77+
"outputs": [],
78+
"source": [
79+
"import warnings\n",
80+
"warnings.filterwarnings('ignore')"
81+
]
82+
},
7383
{
7484
"cell_type": "code",
7585
"execution_count": 1,
@@ -113,6 +123,28 @@
113123
"sns.set_style('whitegrid')"
114124
]
115125
},
126+
{
127+
"cell_type": "code",
128+
"execution_count": null,
129+
"metadata": {
130+
"collapsed": false,
131+
"jupyter": {
132+
"outputs_hidden": false
133+
},
134+
"pycharm": {
135+
"name": "#%%\n"
136+
}
137+
},
138+
"outputs": [],
139+
"source": [
140+
"def format_time(t):\n",
141+
" \"\"\"Return a formatted time string 'HH:MM:SS\n",
142+
" based on a numeric time() value\"\"\"\n",
143+
" m, s = divmod(t, 60)\n",
144+
" h, m = divmod(m, 60)\n",
145+
" return f'{h:0>2.0f}:{m:0>2.0f}:{s:0>5.2f}'"
146+
]
147+
},
116148
{
117149
"cell_type": "markdown",
118150
"metadata": {},
@@ -273,8 +305,7 @@
273305
"ExecuteTime": {
274306
"end_time": "2018-12-25T19:06:08.577453Z",
275307
"start_time": "2018-12-25T19:06:08.570117Z"
276-
},
277-
"scrolled": false
308+
}
278309
},
279310
"outputs": [
280311
{
@@ -1267,8 +1298,7 @@
12671298
"ExecuteTime": {
12681299
"end_time": "2018-12-25T17:59:34.870288Z",
12691300
"start_time": "2018-12-25T17:29:45.640518Z"
1270-
},
1271-
"scrolled": false
1301+
}
12721302
},
12731303
"outputs": [
12741304
{
@@ -1327,27 +1357,25 @@
13271357
" \n",
13281358
" # deal with system events\n",
13291359
" if message_type == 'S':\n",
1330-
" timestamp = int.from_bytes(message.timestamp, byteorder='big')\n",
1360+
" seconds = int.from_bytes(message.timestamp, byteorder='big') * 1e-9\n",
13311361
" print('\\n', event_codes.get(message.event_code.decode('ascii'), 'Error'))\n",
1332-
" print('\\t{0}\\t{1:,.0f}'.format(timedelta(seconds=timestamp * 1e-9),\n",
1333-
" message_count))\n",
1362+
" print(f'\\t{format_time(seconds)}\\t{message_count:12,.0f}')\n",
13341363
" if message.event_code.decode('ascii') == 'C':\n",
13351364
" store_messages(messages)\n",
13361365
" break\n",
13371366
" message_count += 1\n",
1338-
" \n",
1339-
" if message_count % 2.5e7 == 0: \n",
1340-
" t = timedelta(seconds=int.from_bytes(message.timestamp, byteorder='big') * 1e-9)\n",
1341-
" d = timedelta(seconds=time() - start)\n",
1342-
" print('\\t{t}\\t{message_count:12,.0f}\\t{d}')\n",
1367+
"\n",
1368+
" if message_count % 2.5e7 == 0:\n",
1369+
" seconds = int.from_bytes(message.timestamp, byteorder='big') * 1e-9\n",
1370+
" d = format_time(time() - start)\n",
1371+
" print(f'\\t{format_time(seconds)}\\t{message_count:12,.0f}\\t{d}')\n",
13431372
" res = store_messages(messages)\n",
13441373
" if res == 1:\n",
13451374
" print(pd.Series(dict(message_type_counter)).sort_values())\n",
13461375
" break\n",
13471376
" messages.clear()\n",
1348-
" \n",
1349-
" \n",
1350-
"print(timedelta(seconds=time() - start))"
1377+
"\n",
1378+
"print('Duration:', format_time(time() - start))"
13511379
]
13521380
},
13531381
{
@@ -1557,8 +1585,7 @@
15571585
"ExecuteTime": {
15581586
"end_time": "2020-03-27T15:19:06.905421Z",
15591587
"start_time": "2020-03-27T15:19:05.149409Z"
1560-
},
1561-
"scrolled": false
1588+
}
15621589
},
15631590
"outputs": [
15641591
{
@@ -1576,22 +1603,17 @@
15761603
"with pd.HDFStore(itch_store) as store:\n",
15771604
" stocks = store['R'].loc[:, ['stock_locate', 'stock']]\n",
15781605
" trades = store['P'].append(store['Q'].rename(columns={'cross_price': 'price'}), sort=False).merge(stocks)\n",
1606+
"\n",
15791607
"trades['value'] = trades.shares.mul(trades.price)\n",
15801608
"trades['value_share'] = trades.value.div(trades.value.sum())\n",
1609+
"\n",
15811610
"trade_summary = trades.groupby('stock').value_share.sum().sort_values(ascending=False)\n",
15821611
"trade_summary.iloc[:50].plot.bar(figsize=(14, 6), color='darkblue', title='Share of Traded Value')\n",
1612+
"\n",
15831613
"plt.gca().yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))\n",
1584-
"plt.tight_layout()\n",
15851614
"sns.despine()\n",
1586-
"# plt.savefig('figures/share_of_trade_vol', dpi=300)"
1615+
"plt.tight_layout()"
15871616
]
1588-
},
1589-
{
1590-
"cell_type": "code",
1591-
"execution_count": null,
1592-
"metadata": {},
1593-
"outputs": [],
1594-
"source": []
15951617
}
15961618
],
15971619
"metadata": {
@@ -1632,5 +1654,5 @@
16321654
}
16331655
},
16341656
"nbformat": 4,
1635-
"nbformat_minor": 2
1636-
}
1657+
"nbformat_minor": 4
1658+
}

02_market_and_fundamental_data/01_NASDAQ_TotalView-ITCH_Order_Book/02_normalize_tick_data.py

-153
This file was deleted.

02_market_and_fundamental_data/01_NASDAQ_TotalView-ITCH_Order_Book/02_rebuild_nasdaq_order_book.ipynb

+2-3
Original file line numberDiff line numberDiff line change
@@ -1073,8 +1073,7 @@
10731073
"\n",
10741074
"plt.legend(handles=[red_patch, blue_patch])\n",
10751075
"sns.despine()\n",
1076-
"fig.tight_layout()\n",
1077-
"fig.savefig('figures/orderbook', dpi=300)"
1076+
"fig.tight_layout()"
10781077
]
10791078
}
10801079
],
@@ -1117,4 +1116,4 @@
11171116
},
11181117
"nbformat": 4,
11191118
"nbformat_minor": 2
1120-
}
1119+
}

02_market_and_fundamental_data/01_NASDAQ_TotalView-ITCH_Order_Book/03_normalize_tick_data.ipynb

+3-5
Original file line numberDiff line numberDiff line change
@@ -302,8 +302,7 @@
302302
"tick_bars.price.plot(figsize=(10, 5), \n",
303303
" title='Tick Bars | {} | {}'.format(stock, pd.to_datetime(date).date()), lw=1)\n",
304304
"plt.xlabel('')\n",
305-
"plt.tight_layout()\n",
306-
"plt.savefig('figures/tick_bars', dpi=300);"
305+
"plt.tight_layout();"
307306
]
308307
},
309308
{
@@ -386,8 +385,7 @@
386385
" fig.autofmt_xdate()\n",
387386
" fig.suptitle(suptitle)\n",
388387
" fig.tight_layout()\n",
389-
" plt.subplots_adjust(top=0.9)\n",
390-
" fig.savefig(f'figures/{fname}', dpi=300);"
388+
" plt.subplots_adjust(top=0.9);"
391389
]
392390
},
393391
{
@@ -420,7 +418,7 @@
420418
" ohlc = agg_trades.price.ohlc()\n",
421419
" vol = agg_trades.shares.sum().to_frame('vol')\n",
422420
" txn = agg_trades.shares.size().to_frame('txn')\n",
423-
" return pd.concat([ohlc, vwap, vol, txn], axis=1) "
421+
" return pd.concat([ohlc, vwap, vol, txn], axis=1)"
424422
]
425423
},
426424
{

02_market_and_fundamental_data/01_NASDAQ_TotalView-ITCH_Order_Book/README.md

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## 01 Working with Market Data: NASDAQ_TotalView-ITCH Order Book
1+
## Working with Market Data: NASDAQ_TotalView-ITCH Order Book
22

33
While FIX has a dominant large market share, exchanges also offer native protocols. The Nasdaq offers a TotalView ITCH direct data-feed protocol that allows subscribers to track individual orders for equity instruments from placement to execution or cancellation.
44

@@ -42,7 +42,7 @@ For each message, the specification lays out the components and their respective
4242
| Price | 32 | 4 | Price (4) | The display price of the new order. Refer to Data Types for field processing notes. |
4343
| Attribution | 36 | 4 | Alpha | Nasdaq Market participant identifier associated with the entered order |
4444

45-
The notebook [01_build_itch_order_book](01_build_itch_order_book.ipynb) contains the code to
45+
The notebooks [01_build_itch_order_book](01_parse_itch_order_flow_messages.ipynb), [02_rebuild_nasdaq_order_book](02_rebuild_nasdaq_order_book.ipynb) and [03_normalize_tick_data](03_normalize_tick_data.ipynb) contain the code to
4646
- download NASDAQ Total View sample tick data,
4747
- parse the messages from the binary source data
4848
- reconstruct the order book for a given stock
@@ -54,8 +54,9 @@ The code has been updated to use the latest NASDAQ sample file dated March 27, 2
5454
Warning: the tick data is around 12GB in size and some processing steps can take several hours on a 4-core i7 CPU with 32GB RAM.
5555

5656
### Regularizing tick data
57+
5758
The trade data is indexed by nanoseconds and is very noisy. The bid-ask bounce, for instance, causes the price to oscillate between the bid and ask prices when trade initiation alternates between buy and sell market orders. To improve the noise-signal ratio and improve the statistical properties, we need to resample and regularize the tick data by aggregating the trading activity.
5859

5960
We typically collect the open (first), low, high, and closing (last) price for the aggregated period, alongside the volume-weighted average price (VWAP), the number of shares traded, and the timestamp associated with the data.
6061

61-
The notebook [02_normalize_tick_data](02_normalize_tick_data.ipynb) illustrates how to normalize noisy tick using time and volume bars that use different aggregation methods.
62+
The notebook [03_normalize_tick_data](03_normalize_tick_data.ipynb) illustrates how to normalize noisy tick using time and volume bars that use different aggregation methods.

0 commit comments

Comments
 (0)