-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathnearest_neighbor_aaai.py
58 lines (44 loc) · 1.68 KB
/
nearest_neighbor_aaai.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import (absolute_import, division, print_function,
unicode_literals)
"""
NearestNeighbor service example.
=================================
This example shows how to use nearest neighbor service.
We use AAAI 2014 Accepted Papers Data Set.
Lichman, M. (2013).
UCI Machine Learning Repository [http://archive.ics.uci.edu/ml].
Irvine, CA: University of California, School of Information and
Computer Science.
Please download csv file from the url and rename it "aaai.csv"
https://archive.ics.uci.edu/ml/datasets/AAAI+2014+Accepted+Papers
"""
from jubakit.nearest_neighbor import NearestNeighbor, Schema, Dataset, Config
from jubakit.loader.csv import CSVLoader
# Load a CSV file.
loader = CSVLoader('aaai.csv')
# Define a Schema that defines types for each columns of the CSV file.
# In this example, we use "abstract" and "topics" to calculate neighbor scores.
schema = Schema({
'title': Schema.ID,
'abstract': Schema.STRING,
'keyword': Schema.STRING
}, Schema.IGNORE)
print('Schema:', schema)
# Create a Dataset.
dataset = Dataset(loader, schema)
# Create a nearest neighbor configuration.
config = Config()
print(config)
# Launch a nearest neighbor service.
service = NearestNeighbor.run(config)
# add data to nearest neighbor model.
mapper = {}
for (idx, row_id, result) in service.set_row(dataset):
mapper[row_id] = idx
# search k-neighbor data.
for (idx, row_id, results) in service.neighbor_row_from_id(dataset, size=5):
print('\n', idx, row_id)
for r in results[1:]: # first data is the same as query data.
print('\tscore: {0:.3f}\ttitle: {1}'.format(r.score, r.id))