|
174 | 174 | },
|
175 | 175 | {
|
176 | 176 | "cell_type": "code",
|
177 |
| - "execution_count": 127, |
| 177 | + "execution_count": 4, |
178 | 178 | "metadata": {},
|
179 |
| - "outputs": [ |
180 |
| - { |
181 |
| - "name": "stdout", |
182 |
| - "output_type": "stream", |
183 |
| - "text": [ |
184 |
| - "CPU times: user 14.4 s, sys: 12 ms, total: 14.4 s\n", |
185 |
| - "Wall time: 14.4 s\n" |
186 |
| - ] |
187 |
| - } |
188 |
| - ], |
| 179 | + "outputs": [], |
189 | 180 | "source": [
|
190 |
| - "%%time\n", |
191 | 181 | "# Initialize constants\n",
|
192 | 182 | "BUCKET_SIZE = 32 # Number of hash functions in Flajolet-Martin algorithm\n",
|
193 | 183 | "STREAM_SIZE = 100000 # Number of elements to go through\n",
|
|
268 | 258 | },
|
269 | 259 | {
|
270 | 260 | "cell_type": "code",
|
271 |
| - "execution_count": 132, |
| 261 | + "execution_count": 5, |
272 | 262 | "metadata": {},
|
273 | 263 | "outputs": [],
|
274 | 264 | "source": [
|
275 | 265 | "def harmonic_mean(alist):\n",
|
276 | 266 | " n = len(alist)\n",
|
277 |
| - " return n/np.sum([1/val for val in alist])\n", |
| 267 | + " return n/np.sum([1/val for val in alist if val != 0])\n", |
278 | 268 | "\n",
|
279 | 269 | "def get_count(domain):\n",
|
280 | 270 | " last_20 = round(len(domains[domain])*0.2)\n",
|
|
285 | 275 | },
|
286 | 276 | {
|
287 | 277 | "cell_type": "code",
|
288 |
| - "execution_count": 133, |
| 278 | + "execution_count": 6, |
289 | 279 | "metadata": {},
|
290 | 280 | "outputs": [
|
291 | 281 | {
|
|
317 | 307 | "name": "stdout",
|
318 | 308 | "output_type": "stream",
|
319 | 309 | "text": [
|
320 |
| - "total has 88205 unique IPs\n", |
321 |
| - "python.org has 33868 unique IPs\n", |
322 |
| - "wikipedia.org has 35004 unique IPs\n", |
323 |
| - "pandas.pydata.org has 7669 unique IPs\n", |
324 |
| - "dtu.dk has 1680 unique IPs\n", |
325 |
| - "google.com has 1680 unique IPs\n", |
326 |
| - "databricks.com has 840 unique IPs\n", |
327 |
| - "github.com has 868 unique IPs\n", |
328 |
| - "spark.apache.org has 302 unique IPs\n", |
329 |
| - "datarobot.com has 172 unique IPs\n", |
330 |
| - "scala-lang.org has 1 unique IPs\n" |
| 310 | + "total has 93892 unique IPs\n", |
| 311 | + "python.org has 34729 unique IPs\n", |
| 312 | + "wikipedia.org has 42035 unique IPs\n", |
| 313 | + "pandas.pydata.org has 8018 unique IPs\n", |
| 314 | + "dtu.dk has 1976 unique IPs\n", |
| 315 | + "google.com has 1875 unique IPs\n", |
| 316 | + "databricks.com has 908 unique IPs\n", |
| 317 | + "github.com has 949 unique IPs\n", |
| 318 | + "spark.apache.org has 323 unique IPs\n", |
| 319 | + "datarobot.com has 195 unique IPs\n", |
| 320 | + "scala-lang.org has 20 unique IPs\n" |
331 | 321 | ]
|
332 | 322 | }
|
333 | 323 | ],
|
|
385 | 375 | "name": "stdout",
|
386 | 376 | "output_type": "stream",
|
387 | 377 | "text": [
|
388 |
| - "The number of unique IPs visiting python.org is: 33868\n", |
389 |
| - "The number of unique IPs visiting wikipedia.org is: 35004\n", |
390 |
| - "The number of unique IPs visiting pandas.pydata.org is: 7669\n", |
391 |
| - "The number of unique IPs visiting github.com is: 868\n" |
| 378 | + "The number of unique IPs visiting python.org is: 34729\n", |
| 379 | + "The number of unique IPs visiting wikipedia.org is: 42035\n", |
| 380 | + "The number of unique IPs visiting pandas.pydata.org is: 8018\n", |
| 381 | + "The number of unique IPs visiting github.com is: 949\n" |
392 | 382 | ]
|
393 | 383 | }
|
394 | 384 | ],
|
|
480 | 470 | },
|
481 | 471 | {
|
482 | 472 | "cell_type": "code",
|
483 |
| - "execution_count": 139, |
| 473 | + "execution_count": 12, |
484 | 474 | "metadata": {},
|
485 | 475 | "outputs": [
|
486 | 476 | {
|
487 | 477 | "name": "stdout",
|
488 | 478 | "output_type": "stream",
|
489 | 479 | "text": [
|
490 | 480 | "Error added with each element added to the M matrix: 2.7182818284590452e-05\n",
|
491 |
| - "Probability of allowing a count estimate outside the above error: 4.5399929762484854e-05\n" |
| 481 | + "Probability of allowing a count estimate outside the error above: 4.5399929762484854e-05\n" |
492 | 482 | ]
|
493 | 483 | }
|
494 | 484 | ],
|
|
0 commit comments