Merge pull request #273 from DerwenAI/build_update

build updates
DerwenAI · Feb 21, 2024 · 4973b0d · 4973b0d
2 parents 70ea807 + 49e7303
commit 4973b0d
Show file tree

Hide file tree

Showing 17 changed files with 301 additions and 199 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,11 +1,17 @@
+include CITATION
+include LICENSE
+include README.md
+include pyproject.toml
+include requirements.txt
+include setup.py
+include tests/*.py
+prune .ipynb_checkpoints
 # added by check-manifest
 include *.md
 include *.py
 include *.txt
 include *.yaml
 include *.yml
-include CITATION
-include LICENSE
 include pylintrc
 recursive-include bin *.py
 recursive-include bin *.sh

diff --git a/README.md b/README.md
@@ -132,7 +132,7 @@ Source code for **PyTextRank** plus its logo, documentation, and examples
 have an [MIT license](https://spdx.org/licenses/MIT.html) which is
 succinct and simplifies use in commercial applications.
 
-All materials herein are Copyright &copy; 2016-2023 Derwen, Inc.
+All materials herein are Copyright &copy; 2016-2024 Derwen, Inc.
 
 
 ## Attribution

diff --git a/SECURITY.md b/SECURITY.md
@@ -0,0 +1,14 @@
+# Security Policy
+
+## Supported Versions
+
+Versions which are currently being supported with security updates:
+
+| Version | Supported          |
+| ------- | ------------------ |
+| > 0.2   | :white_check_mark: |
+
+## Reporting a Vulnerability
+
+To report a vulnerability, please create a new [*issue*](https://github.com/DerwenAI/pytextrank/issues).
+We will be notified immediately, and will attempt to respond on the reported issue immediately.
diff --git a/bin/nb_md.sh b/bin/nb_md.sh
@@ -1,4 +1,4 @@
-#!/bin/bash -e
+#!/bin/bash -e -x
 
 for notebook_path in examples/*.ipynb; do
     [ -e "$notebook_path" ] || continue
@@ -8,6 +8,6 @@ for notebook_path in examples/*.ipynb; do
 
     cp $notebook_path docs/$notebook
     jupyter nbconvert docs/$notebook --to markdown
-    python bin/vis_doc.py docs/"$stem".md
+    python3 bin/vis_doc.py docs/"$stem".md
     rm docs/$notebook
-done
+done
diff --git a/bin/preview.py b/bin/preview.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from flask import Flask, redirect, send_from_directory, url_for # pylint: disable=E0401
+from flask import Flask, redirect, send_from_directory, url_for  # pylint: disable=E0401
 from pathlib import PurePosixPath
 import os
 

diff --git a/bin/push_pypi.sh b/bin/push_pypi.sh
@@ -1,8 +1,10 @@
-#!/bin/bash -e
+#!/bin/bash -e -x
 
-## debugging the uploaded README:
-# pandoc README.md --from markdown --to rst -s -o README.rst
+rm -rf dist build pytextrank.egg-info
+python3 -m build
+twine check dist/*
 
-rm -rf dist
-python setup.py sdist bdist_wheel
-twine upload --verbose dist/*
+# this assumes the use of `~/.pypirc`
+# https://packaging.python.org/en/latest/specifications/pypirc/
+
+twine upload ./dist/* --verbose
diff --git a/docs/ack.md b/docs/ack.md
@@ -47,8 +47,7 @@ Computer Science (advisor: [Douglas Lenat](https://en.wikipedia.org/wiki/Douglas
 with additional work in Design and Linguistics.
 His business experience includes: 
 Director, VP, and CTO positions leading data teams and machine learning projects;
-former CTO/Board member at two publicly-traded tech firms on NASDAQ OTC:BB;
-and an equity partner at [Amplify Partners](https://derwen.ai/s/hcxhybks9nbh).
+former CTO/Board member at two publicly-traded tech firms on NASDAQ OTC:BB.
 Cited in 2015 as one of the 
 [Top 30 People in Big Data and Analytics](http://www.kdnuggets.com/2015/02/top-30-people-big-data-analytics.html)
 by Innovation Enterprise.
@@ -67,16 +66,15 @@ by Innovation Enterprise.
     [Cascading](https://www.cascading.org/)
   * consultant to enterprise organizations for [data strategy](../glossary/#data-strategy);
     advisor to several AI start-ups, including
-    [Recognai](https://derwen.ai/s/hk4g),
+    [Argilla](https://derwen.ai/s/mz2xj9bsz2jx),
     [KUNGFU.AI](https://derwen.ai/s/rwg8prbgqp36),
-    [Primer](https://derwen.ai/s/tm9jxzcm67hc)
+    [DataSpartan](https://derwen.ai/s/hxsfttck3dkx)
 
 As an author/speaker/instructor, Paco has taught many people (+9000) 
 in industry across a range of topics –
 [*data science*](../glossary/#data-science),
 [*natural language*](../glossary/#natural-language),
 [*cloud computing*](../glossary/#cloud-computing),
-[*reinforcement learning*](../glossary/#reinforcement-learning),
 [*computable content*](../glossary/#computable-content),
 etc. –
 and through guest lectures at 
@@ -115,9 +113,9 @@ Source code for **pytextrank** plus its logo, documentation, and examples
 have an [MIT license](https://spdx.org/licenses/MIT.html) which is
 succinct and simplifies use in commercial applications.
 
-All materials herein are Copyright &copy; 2016-2023 Derwen, Inc.
+All materials herein are Copyright &copy; 2016-2024 Derwen, Inc.
 
-[![logo for Derwen, Inc.](https://derwen.ai/static/block_logo.png)](https://derwen.ai/)
+[![logo for Derwen, Inc.](https://derwen.ai/static/design/block_logo.png)](https://derwen.ai/)
 
 
 ## Production Use Cases

diff --git a/docs/index.md b/docs/index.md
@@ -78,8 +78,6 @@ complementary, hybrid AI solutions.
 Links for other open source community resources:
 
   * [Issue Tracker](https://github.com/DerwenAI/pytextrank/issues)
-  * [Project Board](https://github.com/DerwenAI/pytextrank/projects/1)
-  * [Milestones](https://github.com/DerwenAI/pytextrank/milestones)
   * [spaCy uniVerse](https://spacy.io/universe/project/spacy-pytextrank)
 
 Other good ways to help troubleshoot issues:
@@ -94,7 +92,7 @@ get help about **pytextrank** and related
 topics.
 
   * [community Slack](https://knowledgegraphconf.slack.com/ssb/redirect) – specifically on the `#ask` channel
-  * [*Graph-Based Data Science*](https://www.linkedin.com/groups/6725785/) group on LinkedIn – join to receive related updates, news, conference coupons, etc.
+  * [*Graph Data Science*](https://www.linkedin.com/groups/6725785/) group on LinkedIn – join to receive related updates, news, conference coupons, etc.
 
 For related course materials and training, please check for calendar
 updates in the article

diff --git a/docs/ref.md b/docs/ref.md
@@ -1,4 +1,7 @@
 # Reference: `pytextrank` package
+Package definitions for the `pytextrank` library.
+
+
 ## [`BaseTextRankFactory` class](#BaseTextRankFactory)
 
 A factory class that provides the document with its instance of
@@ -118,7 +121,7 @@ list of ranked phrases, in descending order
 
 ---
 #### [`get_personalization` method](#pytextrank.BaseTextRank.get_personalization)
-[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L376)
+[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L385)
 
 ```python
 get_personalization()
@@ -136,7 +139,7 @@ Defaults to a no-op for the base *TextRank* algorithm.
 
 ---
 #### [`get_unit_vector` method](#pytextrank.BaseTextRank.get_unit_vector)
-[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L638)
+[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L649)
 
 ```python
 get_unit_vector(limit_phrases)
@@ -158,7 +161,7 @@ the unit vector, as a list of `VectorElem` objects
 
 ---
 #### [`calc_sent_dist` method](#pytextrank.BaseTextRank.calc_sent_dist)
-[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L682)
+[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L693)
 
 ```python
 calc_sent_dist(limit_phrases)
@@ -176,7 +179,7 @@ a list of sentence distance measures
 
 ---
 #### [`segment_paragraphs` method](#pytextrank.BaseTextRank.segment_paragraphs)
-[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L731)
+[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L742)
 
 ```python
 segment_paragraphs(sent_dist)
@@ -193,7 +196,7 @@ a list of Paragraph data objects
 
 ---
 #### [`summary` method](#pytextrank.BaseTextRank.summary)
-[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L786)
+[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L797)
 
 ```python
 summary(limit_phrases=10, limit_sentences=4, preserve_order=False, level="sentence")
@@ -221,7 +224,7 @@ texts for sentences, in order
 
 ---
 #### [`write_dot` method](#pytextrank.BaseTextRank.write_dot)
-[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L860)
+[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L871)
 
 ```python
 write_dot(path="graph.dot")
@@ -235,7 +238,7 @@ path for the output file; defaults to `"graph.dot"`
 
 ---
 #### [`plot_keyphrases` method](#pytextrank.BaseTextRank.plot_keyphrases)
-[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L890)
+[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/base.py#L901)
 
 ```python
 plot_keyphrases()
@@ -257,7 +260,7 @@ A factory class that provides the document with its instance of
 
 ---
 #### [`__init__` method](#pytextrank.TopicRankFactory.__init__)
-[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L31)
+[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L32)
 
 ```python
 __init__(edge_weight=1.0, pos_kept=None, token_lookback=3, scrubber=None, stopwords=None, threshold=0.25, method="average")
@@ -268,7 +271,7 @@ Constructor for the factory class.
 
 ---
 #### [`__call__` method](#pytextrank.TopicRankFactory.__call__)
-[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L58)
+[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L59)
 
 ```python
 __call__(doc)
@@ -315,7 +318,7 @@ Algorithm Overview:
 
 ---
 #### [`__init__` method](#pytextrank.TopicRank.__init__)
-[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L120)
+[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L121)
 
 ```python
 __init__(doc, edge_weight, pos_kept, token_lookback, scrubber, stopwords, threshold, method)
@@ -347,7 +350,7 @@ clustering method used in *TopicRank* candidate clustering: see [`scipy.cluster.
 
 ---
 #### [`calc_textrank` method](#pytextrank.TopicRank.calc_textrank)
-[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L307)
+[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L318)
 
 ```python
 calc_textrank()
@@ -364,7 +367,7 @@ list of ranked phrases, in descending order
 
 ---
 #### [`reset` method](#pytextrank.TopicRank.reset)
-[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L367)
+[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/topicrank.py#L378)
 
 ```python
 reset()
@@ -677,6 +680,21 @@ the filtered text representing as a list of lines
 
 
 
+---
+#### [`get_repo_version` function](#pytextrank.get_repo_version)
+[*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/version.py#L49)
+
+```python
+get_repo_version()
+```
+Access the Git repository information and return items to identify
+the version/commit running in production.
+
+  * *returns* : `typing.Tuple[str, str]`  
+version tag and commit hash
+
+
+
 ---
 #### [`groupby_apply` function](#pytextrank.groupby_apply)
 [*\[source\]*](https://github.com/DerwenAI/pytextrank/blob/main/pytextrank/util.py#L14)

diff --git a/pkg_doc.cfg b/pkg_doc.cfg
@@ -0,0 +1,20 @@
+{
+    "src_url": "https://github.com/DerwenAI/pytextrank/blob/main",
+
+    "module": "pytextrank",
+
+    "classes": [
+        "BaseTextRankFactory",
+        "BaseTextRank",
+        "TopicRankFactory",
+        "TopicRank",
+        "PositionRankFactory",
+        "PositionRank",
+        "BiasedTextRankFactory",
+        "BiasedTextRank",
+        "Lemma",
+        "Phrase",
+        "Sentence",
+        "VectorElem"
+    ]
+}
diff --git a/pkg_doc.py b/pkg_doc.py
@@ -1,48 +1,43 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-import pyfixdoc
+"""
+Generate the `apidocs` markdown needed for the package reference.
+"""
+
+import importlib
+import json
 import sys
 
+import pyfixdoc
+
 
 ######################################################################
 ## main entry point
 
 if __name__ == "__main__":
+    ref_md_file: str = sys.argv[1]
+
     # NB: `inspect` is picky about paths and current working directory
     # this only works if run from the top-level directory of the repo
     sys.path.insert(0, "../")
 
-    # customize the following, per use case
-    import pytextrank # pylint: disable=W0611
-
-    class_list = [
-        "BaseTextRankFactory",
-        "BaseTextRank",
-        "TopicRankFactory",
-        "TopicRank",
-        "PositionRankFactory",
-        "PositionRank",
-        "BiasedTextRankFactory",
-        "BiasedTextRank",
-        "Lemma",
-        "Phrase",
-        "Sentence",
-        "VectorElem",
-        ]
-
-    pkg_doc = pyfixdoc.PackageDoc(
-        "pytextrank",
-        "https://github.com/DerwenAI/pytextrank/blob/main",
-        class_list,
+    with open("pkg_doc.cfg", "r", encoding="utf-8") as fp:
+        config: dict = json.load(fp)
+
+        importlib.import_module(config["module"])
+
+        pkg_doc: pyfixdoc.PackageDoc = pyfixdoc.PackageDoc(
+            config["module"],
+            config["src_url"],
+            config["classes"],
         )
 
-    # NB: uncomment to analyze/troubleshoot the results of `inspect`
-    #pkg_doc.show_all_elements(); sys.exit(0)
+        # NB: uncomment to analyze/troubleshoot the results of `inspect`
+        #pkg_doc.show_all_elements(); sys.exit(0)
 
-    # build the apidocs markdown
-    pkg_doc.build()
+        # build the apidocs markdown
+        pkg_doc.build()
 
-    # output the apidocs markdown
-    ref_md_file = sys.argv[1]
-    pkg_doc.write_markdown(ref_md_file)
+        # output the apidocs markdown
+        pkg_doc.write_markdown(ref_md_file)