-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDetect duplicate authors.js
108 lines (94 loc) · 2.95 KB
/
Detect duplicate authors.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
{
"translatorID": "08dfa05d-3b55-47d5-8a05-c43cb6a1ae97",
"label": "Detect duplicate authors across papers",
"description": "Detect potentially misspelled but same authors across papers",
"creator": "Adam Horvath",
"target": "txt",
"minVersion": "4.0.27",
"maxVersion": "",
"configOptions": {
"getCollections": false
},
"translatorType": 2,
"browserSupport": "gcsv",
"priority": 100,
"inRepository": false,
"lastUpdated": "2018-12-21 15:58:41"
}
var levenshtein = function(a, b) {
if (a.length == 0) return b.length;
if (b.length == 0) return a.length;
// swap to save some memory O(min(a,b)) instead of O(a)
if (a.length > b.length) {
var tmp = a;
a = b;
b = tmp;
}
var row = [];
// init the row
for (var i = 0; i <= a.length; i++) {
row[i] = i;
}
// fill in the rest
for (var i = 1; i <= b.length; i++) {
var prev = i;
for (var j = 1; j <= a.length; j++) {
var val;
if (b.charAt(i - 1) == a.charAt(j - 1)) {
val = row[j - 1]; // match
} else {
val = Math.min(row[j - 1] + 1, // substitution
prev + 1, // insertion
row[j] + 1); // deletion
}
row[j - 1] = prev;
prev = val;
}
row[a.length] = prev;
}
return row[a.length];
}
function doExport() {
let items = [];
let item = null;
while (item = Zotero.nextItem()) {
if(!item.creators){
continue;
}
item.creators.forEach(creator => {
if(creator.firstName && creator.lastName) {
items.push({
author: {
firstName: creator.firstName,
lastName: creator.lastName
},
title: item.title
});
}
});
}
let reported = new Set();
for (let i = 0; i < items.length; i++) {
let duplicates = [items[i]];
let author = items[i].author;
for (let j = i + 1; j < items.length; j++) {
if(reported.has(items[j])){
continue;
}
let otherAuthor = items[j].author;
if (author.firstName.trim()[0].toLowerCase() != otherAuthor.firstName.trim()[0].toLowerCase()) {
continue;
}
let distance = levenshtein(author.lastName, otherAuthor.lastName);
let firstNameMatch = author.firstName == otherAuthor.firstName;
if ((distance == 0 && !firstNameMatch) || distance == 1) {
duplicates.push(items[j]);
reported.add(items[j]);
}
}
if (duplicates.length > 1) {
duplicates.forEach(item => Zotero.write(item.author.lastName + ', ' + item.author.firstName + ': ' + item.title + '\n'));
Zotero.write('\n');
}
}
}