Skip to content

Commit

Permalink
New release
Browse files Browse the repository at this point in the history
  • Loading branch information
dhondta committed Feb 24, 2024
1 parent 7801c5e commit 4f1b382
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 49 deletions.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,15 @@ dependencies = [
"fonttools>=4.43.0", # SNYK-PYTHON-FONTTOOLS-6133203
"matplotlib",
"pyzotero",
"tinyscript>=1.28.6",
"tinyscript>=1.30.6",
"xlsxwriter",
]
dynamic = ["version"]

[project.optional-dependencies]
gpt = [
"chromadb",
"fastapi>=0.109.1", # SNYK-PYTHON-FASTAPI-6228055
"gpt4all",
"langchain>=0.0.225",
"llama-cpp-python",
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ numpy>=1.22.2 # SNYK-PYTHON-NUMPY-2321964
pyzotero
requests
sympy>=1.12 # SNYK-PYTHON-SYMPY-6084333
tinyscript>=1.28.6
tinyscript>=1.30.6
tqdm
xlsxwriter
pillow>=10.0.1 # fixed with tinyscript>=1.28.6 ; asciistuff>=1.2.6
fastapi>=0.109.1 # not directly required, pinned by Snyk to avoid a vulnerability
pillow>=10.2.0 # fixed with tinyscript>=1.30.6 ; asciistuff>=1.3.0
fastapi>=0.109.1 # SNYK-PYTHON-FASTAPI-6228055
2 changes: 1 addition & 1 deletion src/zotero/VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.6.6
1.6.7
74 changes: 45 additions & 29 deletions src/zotero/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from tinyscript import *
from tinyscript.helpers.text import _indent
from tinyscript.report import *
from warnings import filterwarnings

filterwarnings("ignore", "The input looks more like a filename than markup")


__all__ = ["ZoteroCLI",
Expand Down Expand Up @@ -194,6 +197,32 @@ def _creds(self):
CREDS_FILE.ask("API ID: ", "API key: ")
CREDS_FILE.save()

def _expand_limit(self, limit, sort=None, desc=False, age=True):
""" Expand the 'limit' parameter according to the following format: (([order])[field]:)[limit]
- order: "<" for increasing, ">" for decreasing
- field: target field
- limit: numerical value for limiting records """
lfield, lfdesc = sort, desc
if limit is not None:
try:
lfield, limit = limit.split(":")
lfield = lfield.strip()
# handle [<>] as sort orders
if lfield[0] in "<>":
lfdesc = lfield[0] == ">"
lfield = lfield[1:]
# handle "rank*" as using the strict rank, that is, with no damping factor relatd to the item's age
if lfield == "rank*":
age = False
lfield = "rank"
except ValueError:
pass
if not str(limit).isdigit() or int(limit) <= 0:
raise ValueError("Bad limit number ; sould be a positive integer")
limit = int(limit)
return limit, lfield, lfdesc, age


def _filter(self, fields=None, filters=None, force=False):
""" Apply one or more filters to the items. """
# validate and make filters
Expand Down Expand Up @@ -229,7 +258,7 @@ def _filter(self, fields=None, filters=None, force=False):
# filter format: (negate, lambda, lambda's second arg)
elif field == "tags":
if regex not in self._valid_tags and regex not in ["-", "<empty>"]:
logger.debug("Should be one of:\n- " + \
logger.warning(f"Got tag '{regex}' ; should be one of:\n- " + \
"\n- ".join(sorted(self._valid_tags, key=ZoteroCLI.sort)))
raise ValueError("Tag '%s' does not exist" % regex)
filt = (not_, lambda i, r: r in ["-", "<empty>"] and i['data']['tags'] in ["", []] or \
Expand All @@ -246,8 +275,9 @@ def _filter(self, fields=None, filters=None, force=False):
# validate fields
afields = (fields or []) + list(_filters.keys())
for f in afields:
if f not in self._valid_fields:
logger.debug("Should be one of:\n- " + "\n- ".join(sorted(self._valid_fields, key=ZoteroCLI.sort)))
if f not in self._valid_fields and regex != "-":
logger.warning(f"Got field name '{f}' ; should be one of:\n- " + \
"\n- ".join(sorted(self._valid_fields, key=ZoteroCLI.sort)))
raise ValueError("Bad field name '%s'" % f)
# now yield items, applying the filters and only selecting the given fields
for i in self.items:
Expand Down Expand Up @@ -335,7 +365,7 @@ def _filter(self, fields=None, filters=None, force=False):
d[f] = ""
for n in self.notes:
if n['data']['parentItem'] == i['key']:
t = bs4.BeautifulSoup(n['data']['note']).text
t = bs4.BeautifulSoup(n['data']['note'], "html.parser").text
try:
f, c = t.split(":", 1)
except:
Expand Down Expand Up @@ -408,35 +438,21 @@ def _items(self, fields=None, filters=None, sort=None, desc=False, limit=None, f
fields.insert(fields.index(f), "rank")
fields.remove(f)
# extract the limit field
lfield, lfdesc = sort, desc
if limit is not None:
try:
lfield, limit = limit.split(":")
lfield = lfield.strip()
# handle [<>] as sort orders
if lfield[0] in "<>":
lfdesc = lfield[0] == ">"
lfield = lfield[1:]
# handle "rank*" as using the strict rank, that is, with no damping factor relatd to the item's age
if lfield == "rank*":
age = False
lfield = "rank"
except ValueError:
pass
if not str(limit).isdigit() or int(limit) <= 0:
raise ValueError("Bad limit number ; sould be a positive integer")
limit = int(limit)
limit, lfield, lfdesc, age = self._expand_limit(limit, sort, desc, age)
# select relevant items, including all the fields required for further computations
ffields = fields[:]
if sort not in ffields:
ffields.append(sort)
if "rank" in fields or lfield == "rank" or sort == "rank" or \
"rank" in [f.split(":")[0].lstrip("~") for f in filters or []]:
for f in ["rank", "citations", "references", "year", "zscc"]:
for f in ["rank", "title", "citations", "references", "year", "zscc"]:
if f not in ffields:
ffields.append(f)
if lfield not in ffields:
ffields.append(lfield)
logger.debug(f"Selected fields: {'|'.join(ffields)}")
if len(filters):
logger.debug(f"Filtering entries ({filters})...")
items = {i['key']: i for i in \
self._filter(ffields, [f for f in filters if not re.match(r"\~?rank\:", f)], force)}
if len(items) == 0:
Expand Down Expand Up @@ -473,7 +489,7 @@ def _items(self, fields=None, filters=None, sort=None, desc=False, limit=None, f
self.ranks[k1] += self.ranks.get(k2, 0.) / r
# check for convergence
if tuple(self.ranks.values()) == prev:
logger.debug("Ranking algorithm converged after %d iterations" % n)
logger.debug(f"Ranking algorithm converged after {n} iterations")
break
prev = tuple(self.ranks.values())
# apply the damping factor at the very end
Expand All @@ -493,7 +509,7 @@ def _items(self, fields=None, filters=None, sort=None, desc=False, limit=None, f
self.ranks = {k: v / max_rank if max_rank else 0. for k, v in self.ranks.items()}
for k, r in sorted(self.ranks.items(), key=lambda x: -x[1]):
k_d = items[k]['data']
logger.debug("%.05f - %s (%s)" % (r, k_d['title'], k_d['date']))
logger.debug(f"{r:.05f} - {k_d['title']} ({k_d['date']})")
# reapply filters, including for fields that were just computed
items = {i['key']: i for i in self._filter(ffields, filters, force)}
for k, i in items.items():
Expand All @@ -514,15 +530,15 @@ def _items(self, fields=None, filters=None, sort=None, desc=False, limit=None, f
select_items = list(items.values())
if lfdesc:
select_items = select_items[::-1]
logger.debug("Limiting to %d items (sorted based on %s in %s order)..." % \
(limit, lfield or sort, ["ascending", "descending"][lfdesc]))
logger.debug(f"Limiting to {limit} items (sorted based on {lfield or sort} in "
f"{['ascending', 'descending'][lfdesc]} order)...")
items = {i['key']: i for i in select_items[:limit]}
# ensure that the rank field is set for every item
if "rank" in ffields:
for i in items.values():
i['data']['rank'] = self.ranks.get(i['key'], .0)
# format the selected items as table data
logger.debug("Sorting items based on %s..." % sort)
logger.debug(f"Sorting items based on {sort}...")
for i in sorted(items.values(), key=lambda i: ZoteroCLI.sort(i['data'].get(sort, "-"), sort)):
row = [self._format_value(i['data'].get(f), f) if i['data'].get(f) else "-" for f in fields]
if len(row) > 1 and all(x in ".-" for x in row[1:]): # row[0] is the item's key ; shall never be "." or "-"
Expand Down Expand Up @@ -689,7 +705,7 @@ def plot(self, name, filters=None):
for y, t in sorted(data.items(), key=lambda x: x[0]):
print(["%d:" % y, "####:"][y == 1900], ", ".join(t))
else:
logger.debug("Should be one of:\n- " + "\n- ".join(sorted(CHARTS)))
logger.debug(f"Got chart name '{name}' ; should be one of:\n- " + "\n- ".join(sorted(CHARTS)))
logger.error("Bad chart")
raise ValueError

Expand Down
32 changes: 17 additions & 15 deletions src/zotero/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,47 +64,49 @@ def main():
parser.add_argument("-r", "--reset", action="store_true", help="remove cached collections and items")
# commands: count | export | list | plot | reset | show | view
sparsers = parser.add_subparsers(dest="command", help="command to be executed")
kw1, kw2 = {}, {}
if __GPT:
cask = sparsers.add_parser("ask", help="ask questions to your Zotero documents")
kw1, kw2 = {'category': "main"}, {'category': "GPT"}
cask = sparsers.add_parser("ask", help="ask questions to your Zotero documents", category="GPT")
cask.add_argument("name", default=MODEL_DEFAULT_NAME, choices=MODELS, nargs="?", help="model name")
cask.add_argument("-c", "--show-content", action="store_true", help="show content from source documents")
cask.add_argument("-m", "--mute-stream", action="store_true", help="disable streaming StdOut callback for LLMs")
cask.add_argument("-s", "--show-source", action="store_true", help="show source documents")
ccount = sparsers.add_parser("count", help="count items")
ccount = sparsers.add_parser("count", help="count items", category="read")
_set_arg(ccount, "filter", "filter to be applied while counting")
_set_arg(ccount, "query")
cexpt = sparsers.add_parser("export", help="export items to a file")
cexpt = sparsers.add_parser("export", help="export items to a file", category="manage")
cexpt.add_argument("field", nargs="+", help="field to be shown")
cexpt.add_argument("-l", "--line-format", help="line's format string for outputting as a list")
cexpt.add_argument("-o", "--output-format", default="xlsx", help="output format",
choices=["csv", "html", "json", "md", "pdf", "rst", "xml", "xlsx", "yaml"])
_set_args(cexpt, "filter", "limit", "query", "sort")
if __GPT:
cingest = sparsers.add_parser("ingest", help="ingest Zotero documents")
cinst = sparsers.add_parser("install", help="install a GPT model")
cingest = sparsers.add_parser("ingest", help="ingest Zotero documents", category="GPT")
cinst = sparsers.add_parser("install", help="install a GPT model", category="GPT")
cinst.add_argument("name", default=MODEL_DEFAULT_NAME, choices=MODEL_NAMES, nargs="?", help="model name")
cinst.add_argument("-d", "--download", action="store_true", help="download the input model")
clist = sparsers.add_parser("list", help="list distinct values for the given field")
clist = sparsers.add_parser("list", help="list distinct values for the given field", category="read")
clist.add_argument("field", help="field whose distinct values are to be listed")
_set_args(clist, "filter", "query")
_set_args(clist, "filter")
clist.add_argument("-l", "--limit", type=ts.pos_int, help="limit the number of displayed records")
clist.add_argument("--desc", action="store_true", help="sort results in descending order")
cmark = sparsers.add_parser("mark", help="mark items with a marker")
cmark = sparsers.add_parser("mark", help="mark items with a marker", category="manage")
cmark.add_argument("marker", choices=[x for p in MARKERS for x in p[:2]], help="marker to be set",
note="possible values:\n - {}".format("\n - ".join("%s: %s" % (p[0], p[2]) for p in MARKERS)))
_set_args(cmark, "filter", "limit", "query", "sort")
cplot = sparsers.add_parser("plot", help="plot various information using Matplotlib")
cplot = sparsers.add_parser("plot", help="plot various information using Matplotlib", category="read")
cplot.add_argument("chart", choices=CHARTS, help="chart to be plotted")
_set_args(cplot, "filter", "query")
creset = sparsers.add_parser("reset", help="reset cached collections and items")
creset = sparsers.add_parser("reset", help="reset cached collections and items", category="manage")
creset.add_argument("-r", "--reset-items", action="store_true", help="reset items only")
if __GPT:
cselect = sparsers.add_parser("select", help="select a GPT model")
cselect = sparsers.add_parser("select", help="select a GPT model", category="GPT")
cselect.add_argument("name", default=MODEL_DEFAULT_NAME, choices=MODELS, nargs="?", help="model name")
cshow = sparsers.add_parser("show", help="show a list of items")
cshow = sparsers.add_parser("show", help="show a list of items", category="read")
cshow.add_argument("field", nargs="*", help="field to be shown")
_set_args(cshow, "filter", "limit", "query", "sort")
cview = sparsers.add_parser("view", help="view a single item")
cview = sparsers.add_parser("view", help="view a single item", category="read")
cview.add_argument("name", help="field name for selection")
cview.add_argument("value", help="field value to be selected")
cview.add_argument("field", nargs="+", help="field to be shown")
Expand All @@ -114,9 +116,9 @@ def main():
if hasattr(args, "field") and args.field == ["-"]:
args.field = QUERIES[args.query].get('fields', ["title"])
args.filter.extend(QUERIES[args.query].get('filter', []))
if args.limit is None:
if getattr(args, "limit", None) is None:
args.limit = QUERIES[args.query].get('limit')
if args.sort is None:
if getattr(args, "sort", None) is None:
args.sort = QUERIES[args.query].get('sort')
if hasattr(args, "sort"):
args.desc = False
Expand Down

0 comments on commit 4f1b382

Please sign in to comment.