Skip to content

Commit 8c14c4c

Browse files
author
s153398
committed
division by 0
1 parent 46b835b commit 8c14c4c

2 files changed

+44
-64
lines changed

.ipynb_checkpoints/Project 2 - Web Traffic Analysis_v2-checkpoint.ipynb

+22-32
Original file line numberDiff line numberDiff line change
@@ -174,20 +174,10 @@
174174
},
175175
{
176176
"cell_type": "code",
177-
"execution_count": 127,
177+
"execution_count": 4,
178178
"metadata": {},
179-
"outputs": [
180-
{
181-
"name": "stdout",
182-
"output_type": "stream",
183-
"text": [
184-
"CPU times: user 14.4 s, sys: 12 ms, total: 14.4 s\n",
185-
"Wall time: 14.4 s\n"
186-
]
187-
}
188-
],
179+
"outputs": [],
189180
"source": [
190-
"%%time\n",
191181
"# Initialize constants\n",
192182
"BUCKET_SIZE = 32 # Number of hash functions in Flajolet-Martin algorithm\n",
193183
"STREAM_SIZE = 100000 # Number of elements to go through\n",
@@ -268,13 +258,13 @@
268258
},
269259
{
270260
"cell_type": "code",
271-
"execution_count": 132,
261+
"execution_count": 5,
272262
"metadata": {},
273263
"outputs": [],
274264
"source": [
275265
"def harmonic_mean(alist):\n",
276266
" n = len(alist)\n",
277-
" return n/np.sum([1/val for val in alist])\n",
267+
" return n/np.sum([1/val for val in alist if val != 0])\n",
278268
"\n",
279269
"def get_count(domain):\n",
280270
" last_20 = round(len(domains[domain])*0.2)\n",
@@ -285,7 +275,7 @@
285275
},
286276
{
287277
"cell_type": "code",
288-
"execution_count": 133,
278+
"execution_count": 6,
289279
"metadata": {},
290280
"outputs": [
291281
{
@@ -317,17 +307,17 @@
317307
"name": "stdout",
318308
"output_type": "stream",
319309
"text": [
320-
"total has 88205 unique IPs\n",
321-
"python.org has 33868 unique IPs\n",
322-
"wikipedia.org has 35004 unique IPs\n",
323-
"pandas.pydata.org has 7669 unique IPs\n",
324-
"dtu.dk has 1680 unique IPs\n",
325-
"google.com has 1680 unique IPs\n",
326-
"databricks.com has 840 unique IPs\n",
327-
"github.com has 868 unique IPs\n",
328-
"spark.apache.org has 302 unique IPs\n",
329-
"datarobot.com has 172 unique IPs\n",
330-
"scala-lang.org has 1 unique IPs\n"
310+
"total has 93892 unique IPs\n",
311+
"python.org has 34729 unique IPs\n",
312+
"wikipedia.org has 42035 unique IPs\n",
313+
"pandas.pydata.org has 8018 unique IPs\n",
314+
"dtu.dk has 1976 unique IPs\n",
315+
"google.com has 1875 unique IPs\n",
316+
"databricks.com has 908 unique IPs\n",
317+
"github.com has 949 unique IPs\n",
318+
"spark.apache.org has 323 unique IPs\n",
319+
"datarobot.com has 195 unique IPs\n",
320+
"scala-lang.org has 20 unique IPs\n"
331321
]
332322
}
333323
],
@@ -385,10 +375,10 @@
385375
"name": "stdout",
386376
"output_type": "stream",
387377
"text": [
388-
"The number of unique IPs visiting python.org is: 33868\n",
389-
"The number of unique IPs visiting wikipedia.org is: 35004\n",
390-
"The number of unique IPs visiting pandas.pydata.org is: 7669\n",
391-
"The number of unique IPs visiting github.com is: 868\n"
378+
"The number of unique IPs visiting python.org is: 34729\n",
379+
"The number of unique IPs visiting wikipedia.org is: 42035\n",
380+
"The number of unique IPs visiting pandas.pydata.org is: 8018\n",
381+
"The number of unique IPs visiting github.com is: 949\n"
392382
]
393383
}
394384
],
@@ -480,15 +470,15 @@
480470
},
481471
{
482472
"cell_type": "code",
483-
"execution_count": 139,
473+
"execution_count": 12,
484474
"metadata": {},
485475
"outputs": [
486476
{
487477
"name": "stdout",
488478
"output_type": "stream",
489479
"text": [
490480
"Error added with each element added to the M matrix: 2.7182818284590452e-05\n",
491-
"Probability of allowing a count estimate outside the above error: 4.5399929762484854e-05\n"
481+
"Probability of allowing a count estimate outside the error above: 4.5399929762484854e-05\n"
492482
]
493483
}
494484
],

Project 2 - Web Traffic Analysis_v2.ipynb

+22-32
Original file line numberDiff line numberDiff line change
@@ -174,20 +174,10 @@
174174
},
175175
{
176176
"cell_type": "code",
177-
"execution_count": 127,
177+
"execution_count": 4,
178178
"metadata": {},
179-
"outputs": [
180-
{
181-
"name": "stdout",
182-
"output_type": "stream",
183-
"text": [
184-
"CPU times: user 14.4 s, sys: 12 ms, total: 14.4 s\n",
185-
"Wall time: 14.4 s\n"
186-
]
187-
}
188-
],
179+
"outputs": [],
189180
"source": [
190-
"%%time\n",
191181
"# Initialize constants\n",
192182
"BUCKET_SIZE = 32 # Number of hash functions in Flajolet-Martin algorithm\n",
193183
"STREAM_SIZE = 100000 # Number of elements to go through\n",
@@ -268,13 +258,13 @@
268258
},
269259
{
270260
"cell_type": "code",
271-
"execution_count": 132,
261+
"execution_count": 5,
272262
"metadata": {},
273263
"outputs": [],
274264
"source": [
275265
"def harmonic_mean(alist):\n",
276266
" n = len(alist)\n",
277-
" return n/np.sum([1/val for val in alist])\n",
267+
" return n/np.sum([1/val for val in alist if val != 0])\n",
278268
"\n",
279269
"def get_count(domain):\n",
280270
" last_20 = round(len(domains[domain])*0.2)\n",
@@ -285,7 +275,7 @@
285275
},
286276
{
287277
"cell_type": "code",
288-
"execution_count": 133,
278+
"execution_count": 6,
289279
"metadata": {},
290280
"outputs": [
291281
{
@@ -317,17 +307,17 @@
317307
"name": "stdout",
318308
"output_type": "stream",
319309
"text": [
320-
"total has 88205 unique IPs\n",
321-
"python.org has 33868 unique IPs\n",
322-
"wikipedia.org has 35004 unique IPs\n",
323-
"pandas.pydata.org has 7669 unique IPs\n",
324-
"dtu.dk has 1680 unique IPs\n",
325-
"google.com has 1680 unique IPs\n",
326-
"databricks.com has 840 unique IPs\n",
327-
"github.com has 868 unique IPs\n",
328-
"spark.apache.org has 302 unique IPs\n",
329-
"datarobot.com has 172 unique IPs\n",
330-
"scala-lang.org has 1 unique IPs\n"
310+
"total has 93892 unique IPs\n",
311+
"python.org has 34729 unique IPs\n",
312+
"wikipedia.org has 42035 unique IPs\n",
313+
"pandas.pydata.org has 8018 unique IPs\n",
314+
"dtu.dk has 1976 unique IPs\n",
315+
"google.com has 1875 unique IPs\n",
316+
"databricks.com has 908 unique IPs\n",
317+
"github.com has 949 unique IPs\n",
318+
"spark.apache.org has 323 unique IPs\n",
319+
"datarobot.com has 195 unique IPs\n",
320+
"scala-lang.org has 20 unique IPs\n"
331321
]
332322
}
333323
],
@@ -385,10 +375,10 @@
385375
"name": "stdout",
386376
"output_type": "stream",
387377
"text": [
388-
"The number of unique IPs visiting python.org is: 33868\n",
389-
"The number of unique IPs visiting wikipedia.org is: 35004\n",
390-
"The number of unique IPs visiting pandas.pydata.org is: 7669\n",
391-
"The number of unique IPs visiting github.com is: 868\n"
378+
"The number of unique IPs visiting python.org is: 34729\n",
379+
"The number of unique IPs visiting wikipedia.org is: 42035\n",
380+
"The number of unique IPs visiting pandas.pydata.org is: 8018\n",
381+
"The number of unique IPs visiting github.com is: 949\n"
392382
]
393383
}
394384
],
@@ -480,15 +470,15 @@
480470
},
481471
{
482472
"cell_type": "code",
483-
"execution_count": 139,
473+
"execution_count": 12,
484474
"metadata": {},
485475
"outputs": [
486476
{
487477
"name": "stdout",
488478
"output_type": "stream",
489479
"text": [
490480
"Error added with each element added to the M matrix: 2.7182818284590452e-05\n",
491-
"Probability of allowing a count estimate outside the above error: 4.5399929762484854e-05\n"
481+
"Probability of allowing a count estimate outside the error above: 4.5399929762484854e-05\n"
492482
]
493483
}
494484
],

0 commit comments

Comments
 (0)