-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexamples.py
executable file
·83 lines (70 loc) · 3.22 KB
/
examples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python3
from pathlib import Path
from typing import List
import click
from click import Context
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from loguru import logger
from pycomfort.config import configure_logger, LOG_LEVELS, LogLevel
from pynction import Try
from getpaper.download import download_papers
from getpaper.parse import parse_papers
@click.group(invoke_without_command=False)
@click.pass_context
def app(ctx: Context):
if ctx.invoked_subcommand is None:
click.echo('Running the default command...')
pass
@app.command("download_papers_async")
@click.argument('dois', nargs=-1)
@click.option('--threads', '-t', type=int, default=5, help='Number of threads (default: 5)')
def download_papers_async_command(dois: List[str], threads: int):
"""Downloads papers with the given DOIs to the specified destination."""
if not dois:
dois = ["10.3390/ijms22031073","wrong_doi", "10.1038/s41597-020-00710-z"]
# Call the actual function with the provided arguments
where = Path("./data/output/test/papers").absolute().resolve()
results = download_papers(dois, where, threads)
for k,v in results[0].items():
print(f"successfully downloaded {k} in an async way to {v}")
for w in results[1]:
print(f"failed download for {w}")
return results
def doi_download_parse(doi: str = "10.3390/ijms22031073", strategy: str = "auto"):
print("example_download_and_parse_doi")
from getpaper.download import try_download
from getpaper.parse import parse_paper
where = Path("./data/output/test/papers").absolute().resolve()
try_download: Try[tuple] = try_download(doi, where)
print(try_download)
return try_download.run(lambda p: parse_paper(p[1].absolute().resolve(), strategy=strategy), lambda f: "it crashed :((((((")
@app.command("doi_download_parse")
@click.option('--doi', type=click.STRING, default = "10.3390/ijms22031073", help="download doi")
@click.option("--strategy", type=click.Choice(["auto", "hi_res", "fast"]), default = "auto", help="strategy used to convert the page")
def doi_download_parse_command(doi: str = "10.3390/ijms22031073", strategy: str = "auto"):
return doi_download_parse(doi, strategy)
@app.command("parse_papers")
def parse_papers_command():
test_folder = Path("./data/output/test").absolute().resolve()
papers_folder = test_folder / "papers"
destination_folder = test_folder / "parsed_papers"
destination_folder.mkdir(exist_ok=True, parents=True)
return parse_papers(papers_folder, destination_folder, recreate_parent=True)
"""
@app.command("clean")
def clean_command():
papers_folder = Path("./data/output/test/parsed_papers").absolute().resolve()
paper = papers_folder / "10.1038" / "s41597-020-00710-z_unstructured.txt"
text = paper.read_text(encoding="utf-8")
#openai_key = load_environment_keys()
print("proofreading")
results = proofread(text)
print("RESULTS ARE:\n")
#paper_improved = papers_folder / "10.1038" / "s41597-020-00710-z_TEST.txt"
#print(f"RESULTS WILL BE WRITTEN TO {paper_improved}")
print("CLEAN PAPER IS TEMPORALY NOT WORKING")
#return clean_paper(paper)
"""
if __name__ == '__main__':
app()