-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathgenerate_references.py
156 lines (129 loc) · 5.46 KB
/
generate_references.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# generate_references.py
# pip install pyzotero
from pyzotero import zotero
import subprocess
import argparse
import os
import logging
import shutil
import tempfile
LOG_LEVEL = logging.INFO
FORMAT = '[%(asctime)s-%(levelname)s] %(message)s'
logging.basicConfig(encoding='utf-8', level=LOG_LEVEL, format=FORMAT)
def create_markdown_file_from_headerfile(tempfile: str, outfile: str, headerfile: str):
with open(outfile, 'w') as outf:
with open(headerfile, 'r') as headerf:
for line in headerf:
if line.startswith("##AllReferencesHeader"):
continue
else:
outf.write(line)
outf.write("\n\n")
with open(tempfile, 'r') as tempf:
for line in tempf:
outf.write(line)
def create_markdown_file_with_header(tempfile: str, outfile: str, header: str):
with open(outfile, 'w') as outf:
outf.write(header)
outf.write("\n")
with open(tempfile, 'r') as tempf:
for line in tempf:
outf.write(line)
def get_manubot_command(outfile: str, string_of_idenfiers: str):
command = "manubot cite --format=markdown "
if outfile:
command += f"--output={outfile} "
return command + string_of_identifiers
def get_doi(item):
if "DOI" not in item["data"].keys():
return None
if item["data"]["DOI"].strip() == "":
return None
if "," in item["data"]["DOI"]:
return "doi:" + item["data"]["DOI"].split(",")[0]
else:
return "doi:" + item["data"]["DOI"]
def get_isbn(item):
if "ISBN" not in item["data"].keys():
return None
elif item["data"]["ISBN"].strip() == "":
return None
else:
return "isbn:" + item["data"]["ISBN"]
def get_url(item):
if "url" not in item["data"].keys():
return None
elif item["data"]["url"].strip() == "":
return None
else:
return "url:" + item["data"]["url"]
if __name__ == "__main__":
ap = argparse.ArgumentParser()
ap.add_argument("-t", "--tag", help="Zotero library tag", required=True)
ap.add_argument("--lib-id", help="Zotero ID (user or group) from https://www.zotero.org/settings/keys", required=True)
ap.add_argument("--api-key", help="Zotero API key from https://www.zotero.org/settings/keys/new", required=True)
ap.add_argument("--lib-type", help="Library type associated with the Zotero ID ('user' or 'group')", required=True)
ap.add_argument("--outfile", help="Filename for Markdown output", required=False)
ap.add_argument("--headerfile", help="Header file for Markdown output", required=False)
ap.add_argument("--header", help="Header text for Markdown output", required=False)
args = vars(ap.parse_args())
# Connect to Zotero library and filter by tag
zot = zotero.Zotero(args["lib_id"], args["lib_type"], args["api_key"])
if args["tag"]:
zot.add_parameters(tag=args["tag"], sort="date")
else:
raise NotImplementedError
# must use zot.everything() to gather references otherwise results are
# capped at 100 items
items = zot.everything(zot.top())
# Gather Reference Identifiers
string_of_identifiers = ""
identifier_ct = 0
err_items = []
for item in items:
title = item["data"]["title"]
identifer = get_doi(item)
if not identifer:
identifer = get_isbn(item)
if not identifer:
identifer = get_url(item)
if not identifer:
err_items.append(item)
continue
string_of_identifiers += identifer
string_of_identifiers += " "
identifier_ct += 1
# Zotero Library Statistics Logging
unique_identifier_ct = len(set(string_of_identifiers.split()))
logging.info("Total refs with tag \"%s\" = %d", args["tag"], len(items))
logging.info(" Error refs count = %d", len(err_items))
logging.info(" Total refs minus errors count = %d", identifier_ct)
logging.info(" Included refs count = %d", unique_identifier_ct)
logging.info(" Duplicate count = %d", identifier_ct - unique_identifier_ct)
for item in err_items:
logging.info("Error Ref (no DOI, ISBN, or URL): %s", item["data"]["title"])
if identifier_ct == 0:
logging.error("No references collected, exiting now")
exit()
# Set the Output file if applicable
tempdir = None
manubot_ofile = None
if args["outfile"] and (args["headerfile"] or args['header']):
tempdir = tempfile.mkdtemp()
manubot_ofile = os.path.join(tempdir, "temp_02_allreferences.md")
if args["outfile"] and not (args["headerfile"] or args['header']):
manubot_ofile = args["outfile"]
# Run Manubot on the Reference Identifiers
command = get_manubot_command(manubot_ofile, string_of_identifiers)
subprocess.call(command, shell=True)
# if a headerfile is specified, create a markdown file of references beginnning
# with the text from the header file
if args["outfile"] and args["headerfile"]:
create_markdown_file_from_headerfile(manubot_ofile, args["outfile"], args["headerfile"])
# if a header string is specified, create a markdown file of references beginning
# with the text from the header string
elif args["outfile"] and args["header"]:
create_markdown_file_with_header(manubot_ofile, args["outfile"], args["header"])
# Clean up temporary directory
if tempdir and os.path.isdir(tempdir):
shutil.rmtree(tempdir)