setup.cfg

[metadata]
name = decoding-trust
version = 0.1.0
author = Secure Learning Lab
author_email = boxinw2@illinois.edu
description = Trustworthiness Benchmark for Large Language Models
long_description = Trustworthiness Benchmark for Large Language Models
keywords = trustworthy language models
license = Creative Commons Public Licenses
classifiers =
    Programming Language :: Python :: 3 :: Only
    Programming Language :: Python :: 3.9
    License :: OSI Approved :: Apache Software License
url = https://github.com/AI-secure/DecodingTrust

[options]
python_requires = >=3.9
package_dir =
    =src
packages = find:
zip_safe = False
include_package_data = True

install_requires =
    crfm-helm @ git+https://github.com/danielz02/helm.git@main
    hydra-core~=1.3.2
    fairlearn~=0.9.0
    # Common
    cattrs~=22.2.0
    dacite~=1.6.0
    importlib-resources~=5.10.0
    Mako~=1.2.3
    numpy~=1.23.3
    pyhocon~=0.3.59
    retrying~=1.3.4
    simple-slurm~=0.2.6  # For slurm_jobs
    spacy~=3.5.3
    tqdm~=4.64.1
    zstandard~=0.18.0
    # sqlitedict==2.0.0 is slow! https://github.com/RaRe-Technologies/sqlitedict/issues/152
    # Keep sqlitedict version at 1.7.0.
    sqlitedict~=1.7.0
    bottle~=0.12.23
    # TODO: Remove these from common
    protobuf~=3.20.2  # Can't use 4.21.0 due to backward incompatibility
    pymongo~=4.2.0

    # Basic Scenarios
    datasets~=2.5.2
    pyarrow~=11.0.0  # Pinned transitive dependency for datasets; workaround for #1026
    jsonlines~=3.1.0  # Not really needed

    # Basic metrics
    nltk~=3.7
    pyext~=0.7
    rouge-score~=0.1.2
    scipy>=1.9.1,<=1.10.1
    uncertainty-calibration~=0.1.3
    # Work around https://github.com/p-lambda/verified_calibration/issues/11
    # TODO: Remove after this issue is resolved
    scikit-learn~=1.1.2

    # Model Extras
    aleph-alpha-client~=2.14.0
    anthropic~=0.2.5
    icetk~=0.0.4  # for ice_tokenizer_client
    openai~=0.27.8
    sentencepiece~=0.1.97  # For palmyra_client and yalm_tokenizer
    tiktoken>=0.3.3  # for openai_client
    tokenizers~=0.13.3  # for aleph_alpha_client
    websocket-client~=1.3.2  # For Anthropic (Legacy stanford-online-all-v4-s3)

    # Models and Metrics Extras
    transformers>=4.31.0  # For anthropic_client, huggingface_client, huggingface_tokenizer, test_openai_token_cost_estimator, model_summac (via summarization_metrics)
    # TODO: Upgrade torch
    torch>=1.12.0,<=2.0.1  # For huggingface_client, yalm_tokenizer, model_summac (via summarization_metrics)
    torchvision>=0.13.1,<3.0.0  # For huggingface_client, yalm_tokenizer, model_summac (via summarization_metrics)

    # Metrics Extras
    google-api-python-client~=2.64.0  # For perspective_api_client via toxicity_metrics

    # Retry
    tenacity~=8.2.2

    shortuuid~=1.0.11

    colorcet~=3.0.1
    matplotlib~=3.6.0
    seaborn~=0.11.0

[options.extras_require]

awq =
    crfm-helm[awq] @ git+https://github.com/danielz02/helm.git@main

gptq =
    torch>=1.12.0,<=2.0.1
    crfm-helm[gptq] @ git+https://github.com/danielz02/helm.git@main

quant =
    torch>=1.12.0,<=2.0.1
    crfm-helm[quant] @ git+https://github.com/danielz02/helm.git@main

slurm =
    hydra-submitit-launcher~=1.2.0
    submitit~=1.5.0

all =
    decoding-trust[quant]
    decoding-trust[slurm]

[options.entry_points]
console_scripts =
    dt-run = dt.main:main

[options.packages.find]
where = src
exclude =
    tests*

# Settings for Flake8: Tool For Style Guide Enforcement
[flake8]
max-line-length = 120
exclude = venv/*

# Ignore completely:
# E203 - White space before ':', (conflicts with black)
# E231 - Missing whitespace after ',', ';', or ':'
# E731 - do not assign a lambda expression, use a def
# W503 - line break before binary operator, (conflicts with black)
# W605 - invalid escape sequence '\', (causes failures)
ignore = E203,E231,E731,W503,W605

# Settings for Mypy: static type checker for Python 3
[mypy]
ignore_missing_imports = True

[tool:pytest]
addopts =
    # By default, we don't test models because doing so will
    # make real requests and spend real money
    -m 'not models'
markers =
    # Marker for tests that make real model requests
    models