Fast-LLM/setup.cfg at main · ServiceNow/Fast-LLM · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
[metadata]
name = fast_llm

[options]
packages =
    fast_llm
    fast_llm_external_models
include_package_data = True
python_requires = >=3.12
install_requires =
    requests>=2.32.5
    PyYAML>=6.0.3
    pybind11>=3.0.1
    packaging>=25.0

[options.extras_require]
# Required to use the main functionality of Fast-LLM
# To install on cpu environment (ex. for IDE support):
#   FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE FLASH_ATTENTION_FORCE_BUILD=TRUE pip install -e ".[CORE]" --no-build-isolation
CORE =
    # Available through the nvidia base image
    torch>=2.9.0
    # apex  # Available through the nvidia base image, requires manual build with --cuda_ext --fast_layer_norm
    numpy>=2.1.0
    # Used for checkpoints
    safetensors>=0.6.2
    # Update the base image (version fixed to ensure there is a wheel for the base image), may need --no-build-isolation
    flash-attn==2.7.4.post1
    # Dropless MoE kernel is broken with triton >= 3.2.0 and needs a rewrite (also limited to 32 experts).
    # Not pinning triton here as it breaks cpu-only installs and pip dependency resolution.
    # triton==3.5.1


# Small packages required for some optional features and tools.
OPTIONAL =
    # Weights and biases
    wandb>=0.24.0
    # Hydra
    hydra-core>=1.3.2
    omegaconf>=2.3.0
    # Miscellaneous
    tqdm>=4.67.1

# Huggingface tools
HUGGINGFACE =
    transformers>=4.57.3,<5.0.0
    hf-transfer>=0.1.9
    datasets>=4.4.1
    huggingface-hub>=0.36.0

# Required to run SSMs
# To install on cpu environment (ex. for IDE support):
#   MAMBA_FORCE_BUILD=TRUE CAUSAL_CONV1D_FORCE_BUILD=TRUE CAUSAL_CONV1D_SKIP_CUDA_BUILD=TRUE pip install -e ".[CORE,SSM]" --no-build-isolation
SSM =
    mamba_ssm[causal-conv1d]==2.2.6.post3
    # TODO: This is required for varlen mamba, but fails to compile in nvcr.io/nvidia/pytorch:25.11-py3.
    # mamba_ssm[causal-conv1d] @ git+https://github.com/jxiw/varlen_mamba.git@varlen_mamba
    flash-linear-attention @ git+https://github.com/fla-org/flash-linear-attention@67eee20c8503cd19eeb52aa1b99821308e9260c5

GENERATION =
    lm_eval>=0.4.9

STREAMING =
    redis>=7.1.0

# Required for supporting vision inputs
VISION =
    # Vision Tools
    webp>=0.4.0
    pillow-simd>=9.5.0
    torchvision>=0.24.0

DEV =
    # Pre-commit git hook
    pre-commit>=4.5.1
    # Required for testing
    pytest>=9.0.2
    pytest-xdist>=3.8.0
    # Somehow needed for Megatron to work with base image 24.11
    setuptools>=80.9.0
    # Dependency manager needs colorama to show colors.
    colorama>=0.4.6
    fakeredis>=2.32.1

# Required for building the documentation
DOCS =
    mkdocs
    mkdocs-material
    mkdocs-material[imaging]
    mkdocs-section-index
    mkdocstrings[python]
    mkdocs-git-committers-plugin-2
    mkdocs-git-revision-date-localized-plugin
    pypandoc_binary
    mkdocs-bibtex
    cairosvg==2.7.0

[options.entry_points]
console_scripts =
    fast-llm = fast_llm.cli:fast_llm_main