New release

dhondta · Feb 24, 2024 · 4f1b382 · 4f1b382
1 parent 7801c5e
commit 4f1b382
Show file tree

Hide file tree

Showing 5 changed files with 68 additions and 49 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,14 +36,15 @@ dependencies = [
   "fonttools>=4.43.0",  # SNYK-PYTHON-FONTTOOLS-6133203
   "matplotlib",
   "pyzotero",
-  "tinyscript>=1.28.6",
+  "tinyscript>=1.30.6",
   "xlsxwriter",
 ]
 dynamic = ["version"]
 
 [project.optional-dependencies]
 gpt = [
   "chromadb",
+  "fastapi>=0.109.1",  # SNYK-PYTHON-FASTAPI-6228055
   "gpt4all",
   "langchain>=0.0.225",
   "llama-cpp-python",

diff --git a/requirements.txt b/requirements.txt
@@ -8,8 +8,8 @@ numpy>=1.22.2  # SNYK-PYTHON-NUMPY-2321964
 pyzotero
 requests
 sympy>=1.12  # SNYK-PYTHON-SYMPY-6084333
-tinyscript>=1.28.6
+tinyscript>=1.30.6
 tqdm
 xlsxwriter
-pillow>=10.0.1 # fixed with tinyscript>=1.28.6 ; asciistuff>=1.2.6
-fastapi>=0.109.1 # not directly required, pinned by Snyk to avoid a vulnerability
+pillow>=10.2.0  # fixed with tinyscript>=1.30.6 ; asciistuff>=1.3.0
+fastapi>=0.109.1  # SNYK-PYTHON-FASTAPI-6228055
diff --git a/src/zotero/VERSION.txt b/src/zotero/VERSION.txt
@@ -1 +1 @@
-1.6.6
+1.6.7
diff --git a/src/zotero/__init__.py b/src/zotero/__init__.py
@@ -8,6 +8,9 @@
 from tinyscript import *
 from tinyscript.helpers.text import _indent
 from tinyscript.report import *
+from warnings import filterwarnings
+
+filterwarnings("ignore", "The input looks more like a filename than markup")
 
 
 __all__ = ["ZoteroCLI",
@@ -194,6 +197,32 @@ def _creds(self):
             CREDS_FILE.ask("API ID: ", "API key: ")
             CREDS_FILE.save()
 
+    def _expand_limit(self, limit, sort=None, desc=False, age=True):
+        """ Expand the 'limit' parameter according to the following format: (([order])[field]:)[limit]
+             - order: "<" for increasing, ">" for decreasing
+             - field: target field
+             - limit: numerical value for limiting records """
+        lfield, lfdesc = sort, desc
+        if limit is not None:
+            try:
+                lfield, limit = limit.split(":")
+                lfield = lfield.strip()
+                # handle [<>] as sort orders
+                if lfield[0] in "<>":
+                    lfdesc = lfield[0] == ">"
+                    lfield = lfield[1:]
+                # handle "rank*" as using the strict rank, that is, with no damping factor relatd to the item's age
+                if lfield == "rank*":
+                    age = False
+                    lfield = "rank"
+            except ValueError:
+                pass
+            if not str(limit).isdigit() or int(limit) <= 0:
+                raise ValueError("Bad limit number ; sould be a positive integer")
+            limit = int(limit)
+        return limit, lfield, lfdesc, age
+
+
     def _filter(self, fields=None, filters=None, force=False):
         """ Apply one or more filters to the items. """
         # validate and make filters
@@ -229,7 +258,7 @@ def _filter(self, fields=None, filters=None, force=False):
             # filter format: (negate, lambda, lambda's second arg)
             elif field == "tags":
                 if regex not in self._valid_tags and regex not in ["-", "<empty>"]:
-                    logger.debug("Should be one of:\n- " + \
+                    logger.warning(f"Got tag '{regex}' ; should be one of:\n- " + \
                                  "\n- ".join(sorted(self._valid_tags, key=ZoteroCLI.sort)))
                     raise ValueError("Tag '%s' does not exist" % regex)
                 filt = (not_, lambda i, r: r in ["-", "<empty>"] and i['data']['tags'] in ["", []] or \
@@ -246,8 +275,9 @@ def _filter(self, fields=None, filters=None, force=False):
         # validate fields
         afields = (fields or []) + list(_filters.keys())
         for f in afields:
-            if f not in self._valid_fields:
-                logger.debug("Should be one of:\n- " + "\n- ".join(sorted(self._valid_fields, key=ZoteroCLI.sort)))
+            if f not in self._valid_fields and regex != "-":
+                logger.warning(f"Got field name '{f}' ; should be one of:\n- " + \
+                               "\n- ".join(sorted(self._valid_fields, key=ZoteroCLI.sort)))
                 raise ValueError("Bad field name '%s'" % f)
         # now yield items, applying the filters and only selecting the given fields
         for i in self.items:
@@ -335,7 +365,7 @@ def _filter(self, fields=None, filters=None, force=False):
                     d[f] = ""
                 for n in self.notes:
                     if n['data']['parentItem'] == i['key']:
-                        t = bs4.BeautifulSoup(n['data']['note']).text
+                        t = bs4.BeautifulSoup(n['data']['note'], "html.parser").text
                         try:
                             f, c = t.split(":", 1)
                         except:
@@ -408,35 +438,21 @@ def _items(self, fields=None, filters=None, sort=None, desc=False, limit=None, f
             fields.insert(fields.index(f), "rank")
             fields.remove(f)
         # extract the limit field
-        lfield, lfdesc = sort, desc
-        if limit is not None:
-            try:
-                lfield, limit = limit.split(":")
-                lfield = lfield.strip()
-                # handle [<>] as sort orders
-                if lfield[0] in "<>":
-                    lfdesc = lfield[0] == ">"
-                    lfield = lfield[1:]
-                # handle "rank*" as using the strict rank, that is, with no damping factor relatd to the item's age
-                if lfield == "rank*":
-                    age = False
-                    lfield = "rank"
-            except ValueError:
-                pass
-            if not str(limit).isdigit() or int(limit) <= 0:
-                raise ValueError("Bad limit number ; sould be a positive integer")
-            limit = int(limit)
+        limit, lfield, lfdesc, age = self._expand_limit(limit, sort, desc, age)
         # select relevant items, including all the fields required for further computations
         ffields = fields[:]
         if sort not in ffields:
             ffields.append(sort)
         if "rank" in fields or lfield == "rank" or sort == "rank" or \
            "rank" in [f.split(":")[0].lstrip("~") for f in filters or []]:
-            for f in ["rank", "citations", "references", "year", "zscc"]:
+            for f in ["rank", "title", "citations", "references", "year", "zscc"]:
                 if f not in ffields:
                     ffields.append(f)
         if lfield not in ffields:
             ffields.append(lfield)
+        logger.debug(f"Selected fields: {'|'.join(ffields)}")
+        if len(filters):
+            logger.debug(f"Filtering entries ({filters})...")
         items = {i['key']: i for i in \
                  self._filter(ffields, [f for f in filters if not re.match(r"\~?rank\:", f)], force)}
         if len(items) == 0:
@@ -473,7 +489,7 @@ def _items(self, fields=None, filters=None, sort=None, desc=False, limit=None, f
                                     self.ranks[k1] += self.ranks.get(k2, 0.) / r
                 # check for convergence
                 if tuple(self.ranks.values()) == prev:
-                    logger.debug("Ranking algorithm converged after %d iterations" % n)
+                    logger.debug(f"Ranking algorithm converged after {n} iterations")
                     break
                 prev = tuple(self.ranks.values())
             # apply the damping factor at the very end
@@ -493,7 +509,7 @@ def _items(self, fields=None, filters=None, sort=None, desc=False, limit=None, f
             self.ranks = {k: v / max_rank if max_rank else 0. for k, v in self.ranks.items()}
             for k, r in sorted(self.ranks.items(), key=lambda x: -x[1]):
                 k_d = items[k]['data']
-                logger.debug("%.05f - %s (%s)" % (r, k_d['title'], k_d['date']))
+                logger.debug(f"{r:.05f} - {k_d['title']} ({k_d['date']})")
             # reapply filters, including for fields that were just computed
             items = {i['key']: i for i in self._filter(ffields, filters, force)}
             for k, i in items.items():
@@ -514,15 +530,15 @@ def _items(self, fields=None, filters=None, sort=None, desc=False, limit=None, f
                 select_items = list(items.values())
             if lfdesc:
                 select_items = select_items[::-1]
-            logger.debug("Limiting to %d items (sorted based on %s in %s order)..." % \
-                         (limit, lfield or sort, ["ascending", "descending"][lfdesc]))
+            logger.debug(f"Limiting to {limit} items (sorted based on {lfield or sort} in "
+                         f"{['ascending', 'descending'][lfdesc]} order)...")
             items = {i['key']: i for i in select_items[:limit]}
         # ensure that the rank field is set for every item
         if "rank" in ffields:
             for i in items.values():
                 i['data']['rank'] = self.ranks.get(i['key'], .0)
         # format the selected items as table data
-        logger.debug("Sorting items based on %s..." % sort)
+        logger.debug(f"Sorting items based on {sort}...")
         for i in sorted(items.values(), key=lambda i: ZoteroCLI.sort(i['data'].get(sort, "-"), sort)):
             row = [self._format_value(i['data'].get(f), f) if i['data'].get(f) else "-" for f in fields]
             if len(row) > 1 and all(x in ".-" for x in row[1:]):  # row[0] is the item's key ; shall never be "." or "-"
@@ -689,7 +705,7 @@ def plot(self, name, filters=None):
             for y, t in sorted(data.items(), key=lambda x: x[0]):
                 print(["%d:" % y, "####:"][y == 1900], ", ".join(t))
         else:
-            logger.debug("Should be one of:\n- " + "\n- ".join(sorted(CHARTS)))
+            logger.debug(f"Got chart name '{name}' ; should be one of:\n- " + "\n- ".join(sorted(CHARTS)))
             logger.error("Bad chart")
             raise ValueError
 

diff --git a/src/zotero/__main__.py b/src/zotero/__main__.py
@@ -64,47 +64,49 @@ def main():
     parser.add_argument("-r", "--reset", action="store_true", help="remove cached collections and items")
     # commands: count | export | list | plot | reset | show | view
     sparsers = parser.add_subparsers(dest="command", help="command to be executed")
+    kw1, kw2 = {}, {}
     if __GPT:
-        cask = sparsers.add_parser("ask", help="ask questions to your Zotero documents")
+        kw1, kw2 = {'category': "main"}, {'category': "GPT"}
+        cask = sparsers.add_parser("ask", help="ask questions to your Zotero documents", category="GPT")
         cask.add_argument("name", default=MODEL_DEFAULT_NAME, choices=MODELS, nargs="?", help="model name")
         cask.add_argument("-c", "--show-content", action="store_true", help="show content from source documents")
         cask.add_argument("-m", "--mute-stream", action="store_true", help="disable streaming StdOut callback for LLMs")
         cask.add_argument("-s", "--show-source", action="store_true", help="show source documents")
-    ccount = sparsers.add_parser("count", help="count items")
+    ccount = sparsers.add_parser("count", help="count items", category="read")
     _set_arg(ccount, "filter", "filter to be applied while counting")
     _set_arg(ccount, "query")
-    cexpt = sparsers.add_parser("export", help="export items to a file")
+    cexpt = sparsers.add_parser("export", help="export items to a file", category="manage")
     cexpt.add_argument("field", nargs="+", help="field to be shown")
     cexpt.add_argument("-l", "--line-format", help="line's format string for outputting as a list")
     cexpt.add_argument("-o", "--output-format", default="xlsx", help="output format",
                        choices=["csv", "html", "json", "md", "pdf", "rst", "xml", "xlsx", "yaml"])
     _set_args(cexpt, "filter", "limit", "query", "sort")
     if __GPT:
-        cingest = sparsers.add_parser("ingest", help="ingest Zotero documents")
-        cinst = sparsers.add_parser("install", help="install a GPT model")
+        cingest = sparsers.add_parser("ingest", help="ingest Zotero documents", category="GPT")
+        cinst = sparsers.add_parser("install", help="install a GPT model", category="GPT")
         cinst.add_argument("name", default=MODEL_DEFAULT_NAME, choices=MODEL_NAMES, nargs="?", help="model name")
         cinst.add_argument("-d", "--download", action="store_true", help="download the input model")
-    clist = sparsers.add_parser("list", help="list distinct values for the given field")
+    clist = sparsers.add_parser("list", help="list distinct values for the given field", category="read")
     clist.add_argument("field", help="field whose distinct values are to be listed")
-    _set_args(clist, "filter", "query")
+    _set_args(clist, "filter")
     clist.add_argument("-l", "--limit", type=ts.pos_int, help="limit the number of displayed records")
     clist.add_argument("--desc", action="store_true", help="sort results in descending order")
-    cmark = sparsers.add_parser("mark", help="mark items with a marker")
+    cmark = sparsers.add_parser("mark", help="mark items with a marker", category="manage")
     cmark.add_argument("marker", choices=[x for p in MARKERS for x in p[:2]], help="marker to be set",
                        note="possible values:\n - {}".format("\n - ".join("%s: %s" % (p[0], p[2]) for p in MARKERS)))
     _set_args(cmark, "filter", "limit", "query", "sort")
-    cplot = sparsers.add_parser("plot", help="plot various information using Matplotlib")
+    cplot = sparsers.add_parser("plot", help="plot various information using Matplotlib", category="read")
     cplot.add_argument("chart", choices=CHARTS, help="chart to be plotted")
     _set_args(cplot, "filter", "query")
-    creset = sparsers.add_parser("reset", help="reset cached collections and items")
+    creset = sparsers.add_parser("reset", help="reset cached collections and items", category="manage")
     creset.add_argument("-r", "--reset-items", action="store_true", help="reset items only")
     if __GPT:
-        cselect = sparsers.add_parser("select", help="select a GPT model")
+        cselect = sparsers.add_parser("select", help="select a GPT model", category="GPT")
         cselect.add_argument("name", default=MODEL_DEFAULT_NAME, choices=MODELS, nargs="?", help="model name")
-    cshow = sparsers.add_parser("show", help="show a list of items")
+    cshow = sparsers.add_parser("show", help="show a list of items", category="read")
     cshow.add_argument("field", nargs="*", help="field to be shown")
     _set_args(cshow, "filter", "limit", "query", "sort")
-    cview = sparsers.add_parser("view", help="view a single item")
+    cview = sparsers.add_parser("view", help="view a single item", category="read")
     cview.add_argument("name", help="field name for selection")
     cview.add_argument("value", help="field value to be selected")
     cview.add_argument("field", nargs="+", help="field to be shown")
@@ -114,9 +116,9 @@ def main():
         if hasattr(args, "field") and args.field == ["-"]:
             args.field = QUERIES[args.query].get('fields', ["title"])
         args.filter.extend(QUERIES[args.query].get('filter', []))
-        if args.limit is None:
+        if getattr(args, "limit", None) is None:
             args.limit = QUERIES[args.query].get('limit')
-        if args.sort is None:
+        if getattr(args, "sort", None) is None:
             args.sort = QUERIES[args.query].get('sort')
     if hasattr(args, "sort"):
         args.desc = False