Install this version:
emerge -a =sci-ml/lm-eval-0.4.11
If this version is masked, you can unmask it using the autounmask tool or standard emerge options:
autounmask =sci-ml/lm-eval-0.4.11
Or alternatively:
emerge --autounmask-write -a =sci-ml/lm-eval-0.4.11
| Version | EAPI | Keywords | Slot |
|---|---|---|---|
| 0.4.11 | 8 | ~amd64 | 0 |
# Copyright 1999-2026 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
EAPI=8
DISTUTILS_USE_PEP517=setuptools
PYTHON_COMPAT=( python3_{12..14} )
DISTUTILS_SINGLE_IMPL=1
# upstream PyPI dist filename uses underscore (lm_eval-x.y.z.tar.gz),
# project page is /pypi/lm-eval/
PYPI_PN="lm_eval"
PYPI_NO_NORMALIZE=1
inherit distutils-r1 pypi
DESCRIPTION="A framework for evaluating language models (lm-evaluation-harness)"
HOMEPAGE="
https://github.com/EleutherAI/lm-evaluation-harness
https://pypi.org/project/lm-eval/
"
LICENSE="MIT"
SLOT="0"
KEYWORDS="~amd64"
IUSE="+api math sentencepiece statsmodels vllm"
# Core deps from pyproject.toml [project.dependencies] at v0.4.11.
# Optional [project.optional-dependencies] groups are wired as USE flags
# only where every dep is reachable in our overlay set:
# api -> aiohttp, requests, tenacity, tqdm, tiktoken
# math -> sympy, antlr4-python3-runtime==4.11.*, math-verify
# sentencepiece-> sentencepiece
# statsmodels -> upstream "discrim_eval" extra (statsmodels)
# vllm -> vllm
# Other extras (hf, ifeval, multilingual, ruler, wandb, japanese,
# longbench, libra, ipex, gptq, gptqmodel, optimum, sparsify, audiolm_qwen,
# unitxt, zeno, ibm_watsonx_ai, acpbench) gate on packages we do not
# currently carry; users wanting them must `pip install lm_eval[<extra>]`.
#
# math: at lm_eval 0.4.11, lm_eval/tasks/minerva_math/utils.py asserts
# version("antlr4-python3-runtime").startswith("4.11")
# at task-load, so the antlr4-4.11* pin is load-bearing, not advisory
# (verified upstream 2026-05-11). We carry the older
# antlr4-python3-runtime-4.11.0 alongside ::gentoo's 4.13.2 for this;
# flipping USE=math triggers the downgrade.
#
# single-impl: sci-ml/{datasets,evaluate} are SINGLE_IMPL; rest of stack is
# multi-impl, wrapped via python_gen_cond_dep.
RDEPEND="
>=sci-ml/datasets-2.16.0[${PYTHON_SINGLE_USEDEP}]
>=sci-ml/evaluate-0.4.0[${PYTHON_SINGLE_USEDEP}]
vllm? ( >=dev-python/vllm-0.4.2[${PYTHON_SINGLE_USEDEP}] )
$(python_gen_cond_dep '
dev-python/dill[${PYTHON_USEDEP}]
dev-python/jinja2[${PYTHON_USEDEP}]
dev-python/jsonlines[${PYTHON_USEDEP}]
dev-python/more-itertools[${PYTHON_USEDEP}]
dev-python/numpy[${PYTHON_USEDEP}]
dev-python/pytablewriter[${PYTHON_USEDEP}]
dev-python/rouge-score[${PYTHON_USEDEP}]
dev-python/sacrebleu[${PYTHON_USEDEP}]
dev-python/sqlitedict[${PYTHON_USEDEP}]
dev-python/typing-extensions[${PYTHON_USEDEP}]
dev-python/word2number[${PYTHON_USEDEP}]
dev-python/zstandard[${PYTHON_USEDEP}]
dev-python/scikit-learn[${PYTHON_USEDEP}]
api? (
dev-python/aiohttp[${PYTHON_USEDEP}]
dev-python/requests[${PYTHON_USEDEP}]
dev-python/tenacity[${PYTHON_USEDEP}]
dev-python/tiktoken[${PYTHON_USEDEP}]
dev-python/tqdm[${PYTHON_USEDEP}]
)
math? (
=dev-python/antlr4-python3-runtime-4.11*[${PYTHON_USEDEP}]
>=dev-python/sympy-1.12[${PYTHON_USEDEP}]
~dev-python/math-verify-0.9.0[${PYTHON_USEDEP}]
)
sentencepiece? ( >=sci-ml/sentencepiece-0.1.98[${PYTHON_USEDEP}] )
statsmodels? ( dev-python/statsmodels[${PYTHON_USEDEP}] )
')
"
Manage flags for this package:
euse -i <flag> -p sci-ml/lm-eval |
euse -E <flag> -p sci-ml/lm-eval |
euse -D <flag> -p sci-ml/lm-eval
>=sci-ml/datasets-2.16.0[${PYTHON_SINGLE_USEDEP}]
>=sci-ml/evaluate-0.4.0[${PYTHON_SINGLE_USEDEP}]
vllm? ( >=dev-python/vllm-0.4.2[${PYTHON_SINGLE_USEDEP}] )
$(python_gen_cond_dep '
dev-python/dill[${PYTHON_USEDEP}]
dev-python/jinja2[${PYTHON_USEDEP}]
dev-python/jsonlines[${PYTHON_USEDEP}]
dev-python/more-itertools[${PYTHON_USEDEP}]
dev-python/numpy[${PYTHON_USEDEP}]
dev-python/pytablewriter[${PYTHON_USEDEP}]
dev-python/rouge-score[${PYTHON_USEDEP}]
dev-python/sacrebleu[${PYTHON_USEDEP}]
dev-python/sqlitedict[${PYTHON_USEDEP}]
dev-python/typing-extensions[${PYTHON_USEDEP}]
dev-python/word2number[${PYTHON_USEDEP}]
dev-python/zstandard[${PYTHON_USEDEP}]
dev-python/scikit-learn[${PYTHON_USEDEP}]
api? (
dev-python/aiohttp[${PYTHON_USEDEP}]
dev-python/requests[${PYTHON_USEDEP}]
dev-python/tenacity[${PYTHON_USEDEP}]
dev-python/tiktoken[${PYTHON_USEDEP}]
dev-python/tqdm[${PYTHON_USEDEP}]
)
math? (
=dev-python/antlr4-python3-runtime-4.11*[${PYTHON_USEDEP}]
>=dev-python/sympy-1.12[${PYTHON_USEDEP}]
~dev-python/math-verify-0.9.0[${PYTHON_USEDEP}]
)
sentencepiece? ( >=sci-ml/sentencepiece-0.1.98[${PYTHON_USEDEP}] )
statsmodels? ( dev-python/statsmodels[${PYTHON_USEDEP}] )
')