Install this version:
emerge -a =app-misc/llama-cpp-9490
If this version is masked, you can unmask it using the autounmask tool or standard emerge options:
autounmask =app-misc/llama-cpp-9490
Or alternatively:
emerge --autounmask-write -a =app-misc/llama-cpp-9490
| Version | EAPI | Keywords | Slot |
|---|---|---|---|
| 9490 | 8 | ~amd64 | 0 |
# Copyright 2024-2026 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
EAPI=8
ROCM_VERSION=7.1
inherit cmake cuda rocm linux-info systemd
TINY_LLAMAS_COMMIT="99dd1a73db5a37100bd4ae633f4cfce6560e1567"
DESCRIPTION="LLM inference in C/C++ (GGML/GGUF) — CPU + optional GPU backends"
HOMEPAGE="https://github.com/ggml-org/llama.cpp"
SRC_URI="https://github.com/ggml-org/llama.cpp/archive/refs/tags/b${PV}.tar.gz -> ${P}.gh.tar.gz"
S="${WORKDIR}/llama.cpp-b${PV}"
SRC_URI+="
examples? (
https://huggingface.co/ggml-org/tiny-llamas/resolve/${TINY_LLAMAS_COMMIT}/stories15M-q4_0.gguf
-> ggml-org_models_tinyllamas_stories15M-q4_0-${TINY_LLAMAS_COMMIT}.gguf
)
"
LICENSE="MIT"
SLOT="0"
KEYWORDS="~amd64"
# wmma: rocWMMA flash-attention on RDNA3+/CDNA GPUs
# see https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md#hip
IUSE="
curl openblas +openmp blis rocm cuda opencl openssl vulkan flexiblas wmma
examples cpu_flags_x86_avx cpu_flags_x86_avx2 cpu_flags_x86_fma3
cpu_flags_x86_f16c cpu_flags_x86_bmi2 cpu_flags_x86_avx_vnni
cpu_flags_x86_avx512f cpu_flags_x86_avx512vbmi cpu_flags_x86_avx512_vnni
cpu_flags_x86_avx512_bf16
"
REQUIRED_USE="
?? ( openblas blis flexiblas )
wmma? ( rocm )
cpu_flags_x86_avx2? ( cpu_flags_x86_avx )
cpu_flags_x86_avx512f? ( cpu_flags_x86_avx2 )
cpu_flags_x86_avx512vbmi? ( cpu_flags_x86_avx512f )
cpu_flags_x86_avx512_vnni? ( cpu_flags_x86_avx512f )
cpu_flags_x86_avx512_bf16? ( cpu_flags_x86_avx512f )
"
# curl: needed for pulling models from huggingface
# numpy: used by convert_hf_to_gguf.py
CDEPEND="
curl? ( net-misc/curl:= )
openblas? ( sci-libs/openblas:= )
openmp? ( llvm-runtimes/openmp:= )
blis? ( sci-libs/blis:= )
flexiblas? ( sci-libs/flexiblas:= )
rocm? (
>=dev-util/hip-${ROCM_VERSION}:=
>=sci-libs/hipBLAS-${ROCM_VERSION}:=
wmma? ( >=sci-libs/rocWMMA-${ROCM_VERSION}:= )
)
cuda? ( dev-util/nvidia-cuda-toolkit:= )
openssl? ( dev-libs/openssl:= )
"
DEPEND="${CDEPEND}
opencl? ( dev-util/opencl-headers )
vulkan? (
dev-util/spirv-headers
dev-util/vulkan-headers
)
"
RDEPEND="${CDEPEND}
dev-python/numpy
opencl? ( dev-libs/opencl-icd-loader )
vulkan? ( media-libs/vulkan-loader )
acct-user/llama-cpp
acct-group/llama-cpp
"
BDEPEND="media-libs/shaderc"
pkg_setup() {
if use rocm; then
linux-info_pkg_setup
if linux-info_get_any_version && linux_config_exists; then
if ! linux_chkconfig_present HSA_AMD_SVM; then
ewarn "ROCm/HIP requires HSA_AMD_SVM enabled in your kernel config."
fi
fi
fi
}
src_prepare() {
use cuda && cuda_src_prepare
cmake_src_prepare
if use examples; then
mkdir -p "${BUILD_DIR}/tinyllamas" || die
cp "${DISTDIR}/ggml-org_models_tinyllamas_stories15M-q4_0-${TINY_LLAMAS_COMMIT}.gguf" \
"${BUILD_DIR}/tinyllamas/stories15M-q4_0.gguf" || die
fi
}
src_configure() {
local mycmakeargs=(
# -- Build options --
-DLLAMA_BUILD_TESTS=OFF
-DLLAMA_BUILD_EXAMPLES=$(usex examples)
-DLLAMA_BUILD_SERVER=ON
-DCMAKE_SKIP_BUILD_RPATH=ON
-DGGML_RPC=ON
-DLLAMA_CURL=$(usex curl)
-DLLAMA_OPENSSL=$(usex openssl)
-DBUILD_NUMBER="1"
-DGENTOO_REMOVE_CMAKE_BLAS_HACK=ON
# -- CPU feature flags --
# No -march=native: explicit mapping from CPU_FLAGS_X86 for
# reproducible/portable builds. SSE4.2 is the baseline.
-DGGML_NATIVE=0
-DGGML_SSE42=ON
-DGGML_AVX=$(usex cpu_flags_x86_avx)
-DGGML_AVX2=$(usex cpu_flags_x86_avx2)
-DGGML_BMI2=$(usex cpu_flags_x86_bmi2)
-DGGML_FMA=$(usex cpu_flags_x86_fma3)
-DGGML_F16C=$(usex cpu_flags_x86_f16c)
-DGGML_AVX_VNNI=$(usex cpu_flags_x86_avx_vnni)
-DGGML_AVX512=$(usex cpu_flags_x86_avx512f)
-DGGML_AVX512_VBMI=$(usex cpu_flags_x86_avx512vbmi)
-DGGML_AVX512_VNNI=$(usex cpu_flags_x86_avx512_vnni)
-DGGML_AVX512_BF16=$(usex cpu_flags_x86_avx512_bf16)
# -- Backends --
-DGGML_CUDA=$(usex cuda)
-DGGML_OPENCL=$(usex opencl)
-DGGML_OPENMP=$(usex openmp)
-DGGML_VULKAN=$(usex vulkan)
# -- Install paths (avoid clashing with whisper.cpp) --
-DCMAKE_INSTALL_LIBDIR="${EPREFIX}/usr/$(get_libdir)/llama.cpp"
-DCMAKE_INSTALL_RPATH="${EPREFIX}/usr/$(get_libdir)/llama.cpp"
)
# BLAS vendor selection (mutually exclusive via REQUIRED_USE)
if use openblas; then
mycmakeargs+=(
-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
)
fi
if use blis; then
mycmakeargs+=(
-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=FLAME
)
fi
if use flexiblas; then
mycmakeargs+=(
-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=FlexiBLAS
)
fi
# CUDA: set host compiler and sandbox for device node symlinks
if use cuda; then
local -x CUDAHOSTCXX="$(cuda_gccdir)"
cuda_add_sandbox
addpredict "/dev/char/"
fi
# ROCm/HIP: use hipcc and set GPU architecture targets
if use rocm; then
rocm_use_hipcc
mycmakeargs+=(
-DGGML_HIP=ON
-DAMDGPU_TARGETS=$(get_amdgpu_flags)
-DGGML_HIP_ROCWMMA_FATTN=$(usex wmma)
)
fi
cmake_src_configure
}
src_install() {
cmake_src_install
dobin "${BUILD_DIR}/bin/rpc-server"
# Remove installed headers to avoid clashing with whisper.cpp
rm -rf "${ED}/usr/include"
# Systemd service unit and environment configuration
insinto /etc/llama-cpp
doins "${FILESDIR}"/llama-server.conf
systemd_dounit "${FILESDIR}"/llama-server.service
# State directory for model storage
keepdir /var/lib/llama-cpp/models
}
pkg_preinst() {
keepdir /var/lib/llama-cpp/models
fowners llama-cpp:llama-cpp /var/lib/llama-cpp /var/lib/llama-cpp/models
fperms 0750 /var/lib/llama-cpp /var/lib/llama-cpp/models
}
pkg_postinst() {
elog "Installed binaries: llama-server, llama-cli, llama-quantize, rpc-server, ..."
elog ""
elog "Running as a systemd service:"
elog " 1. Place a GGUF model in /var/lib/llama-cpp/models/"
elog " 2. Edit /etc/llama-cpp/llama-server.conf (set LLAMA_MODEL, LLAMA_THREADS)"
elog " 3. systemctl enable --now llama-server"
elog " The API is then available at http://\${LLAMA_HOST}:\${LLAMA_PORT} (OpenAI-compatible)"
elog ""
ewarn "Defaults in /etc/llama-cpp/llama-server.conf are CONSERVATIVE:"
ewarn " LLAMA_THREADS=2 -- adjust to your physical core count (not SMT threads)!"
ewarn " LLAMA_MODEL=... -- must point to an actual GGUF file!"
ewarn "Without adjustment the service runs on 2 threads or fails to find a model."
}
Manage flags for this package:
euse -i <flag> -p app-misc/llama-cpp |
euse -E <flag> -p app-misc/llama-cpp |
euse -D <flag> -p app-misc/llama-cpp
curl? ( net-misc/curl:= )
openblas? ( sci-libs/openblas:= )
openmp? ( llvm-runtimes/openmp:= )
blis? ( sci-libs/blis:= )
flexiblas? ( sci-libs/flexiblas:= )
rocm? (
>=dev-util/hip-7.1:=
>=sci-libs/hipBLAS-7.1:=
wmma? ( >=sci-libs/rocWMMA-7.1:= )
)
cuda? ( dev-util/nvidia-cuda-toolkit:= )
openssl? ( dev-libs/openssl:= )
opencl? ( dev-util/opencl-headers )
vulkan? (
dev-util/spirv-headers
dev-util/vulkan-headers
)
curl? ( net-misc/curl:= )
openblas? ( sci-libs/openblas:= )
openmp? ( llvm-runtimes/openmp:= )
blis? ( sci-libs/blis:= )
flexiblas? ( sci-libs/flexiblas:= )
rocm? (
>=dev-util/hip-7.1:=
>=sci-libs/hipBLAS-7.1:=
wmma? ( >=sci-libs/rocWMMA-7.1:= )
)
cuda? ( dev-util/nvidia-cuda-toolkit:= )
openssl? ( dev-libs/openssl:= )
dev-python/numpy
opencl? ( dev-libs/opencl-icd-loader )
vulkan? ( media-libs/vulkan-loader )
acct-user/llama-cpp
acct-group/llama-cpp
media-libs/shaderc
| Type | File | Size | Source URLs |
|---|---|---|---|
| DIST | llama-cpp-9490.gh.tar.gz | 34031430 bytes | https://github.com/ggml-org/llama.cpp/archive/refs/tags/b9490.tar.gz |