Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,12 @@ Begin by setting up a Python 3.X environment with a recent, CUDA-enabled version
pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
```

On headless, shared, or CPU-only Python environments, PyTorch may be unable to detect a target GPU during installation. In that case, set `TCNN_CUDA_ARCHITECTURES` explicitly before invoking `pip`, for example:
```sh
export TCNN_CUDA_ARCHITECTURES=86
pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
```

Alternatively, if you would like to install from a local clone of __tiny-cuda-nn__, invoke
```sh
tiny-cuda-nn$ cd bindings/torch
Expand Down
272 changes: 168 additions & 104 deletions bindings/torch/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,56 @@

import re
from setuptools import setup
from pkg_resources import parse_version
from packaging.version import parse as parse_version
import subprocess
import shutil
import sys
import torch
from glob import glob

if "CUDA_HOME" not in os.environ and "CUDA_PATH" in os.environ:
os.environ["CUDA_HOME"] = os.environ["CUDA_PATH"]

from torch.utils.cpp_extension import BuildExtension, CUDAExtension

SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
ROOT_DIR = os.path.dirname(os.path.dirname(SCRIPT_DIR))

HELP_FLAGS = {
"-h",
"--help",
"--help-commands",
"--name",
"--version",
"--fullname",
"--author",
"--author-email",
"--maintainer",
"--maintainer-email",
"--contact",
"--contact-email",
"--url",
"--license",
"--description",
"--long-description",
"--platforms",
"--classifiers",
"--keywords",
"--provides",
"--requires",
"--obsoletes",
}
BUILD_COMMANDS = {
"bdist",
"bdist_egg",
"bdist_wheel",
"build",
"build_ext",
"develop",
"editable_wheel",
"install",
}

def min_supported_compute_capability(cuda_version):
if cuda_version >= parse_version("13.0"):
return 75
Expand All @@ -33,6 +72,31 @@ def max_supported_compute_capability(cuda_version):
else:
return 120

def should_build_extensions(argv):
if any(arg in HELP_FLAGS for arg in argv[1:]):
return False

commands = [arg for arg in argv[1:] if arg and not arg.startswith("-")]
return any(command in BUILD_COMMANDS or command.startswith("bdist_") for command in commands)

def detect_compute_capabilities():
if "TCNN_CUDA_ARCHITECTURES" in os.environ and os.environ["TCNN_CUDA_ARCHITECTURES"]:
compute_capabilities = [int(x) for x in os.environ["TCNN_CUDA_ARCHITECTURES"].replace(";", ",").split(",")]
print(f"Obtained compute capabilities {compute_capabilities} from environment variable TCNN_CUDA_ARCHITECTURES")
return compute_capabilities

if torch.cuda.is_available():
major, minor = torch.cuda.get_device_capability()
compute_capabilities = [major * 10 + minor]
print(f"Obtained compute capability {compute_capabilities[0]} from PyTorch")
return compute_capabilities

raise EnvironmentError(
"Unknown compute capability for extension build. "
"Specify TCNN_CUDA_ARCHITECTURES manually (for example, 86 for RTX 30xx GPUs) "
"or install PyTorch with CUDA support so the target GPU can be detected automatically."
)

# Find version of tinycudann by scraping CMakeLists.txt
with open(os.path.join(ROOT_DIR, "CMakeLists.txt"), "r") as cmakelists:
for line in cmakelists.readlines():
Expand All @@ -41,26 +105,22 @@ def max_supported_compute_capability(cuda_version):
break

print(f"Building PyTorch extension for tiny-cuda-nn version {VERSION}")
build_extensions = should_build_extensions(sys.argv)

ext_modules = []

if "TCNN_CUDA_ARCHITECTURES" in os.environ and os.environ["TCNN_CUDA_ARCHITECTURES"]:
compute_capabilities = [int(x) for x in os.environ["TCNN_CUDA_ARCHITECTURES"].replace(";", ",").split(",")]
print(f"Obtained compute capabilities {compute_capabilities} from environment variable TCNN_CUDA_ARCHITECTURES")
elif torch.cuda.is_available():
major, minor = torch.cuda.get_device_capability()
compute_capabilities = [major * 10 + minor]
print(f"Obtained compute capability {compute_capabilities[0]} from PyTorch")
if build_extensions:
compute_capabilities = detect_compute_capabilities()
else:
raise EnvironmentError("Unknown compute capability. Specify the target compute capabilities in the TCNN_CUDA_ARCHITECTURES environment variable or install PyTorch with the CUDA backend to detect it automatically.")
print("Skipping GPU architecture detection for metadata-only setup command.")
compute_capabilities = []

include_networks = True
if "--no-networks" in sys.argv:
include_networks = False
sys.argv.remove("--no-networks")
print("Building >> without << neural networks (just the input encodings)")

if os.name == "nt":
if build_extensions and os.name == "nt":
def find_cl_path():
import glob
for executable in ["Program Files (x86)", "Program Files"]:
Expand All @@ -84,7 +144,7 @@ def find_cl_path():
cpp_standard = 14

# Get CUDA version and make sure the targeted compute capability is compatible
if os.system("nvcc --version") == 0:
if build_extensions and os.system("nvcc --version") == 0:
nvcc_out = subprocess.check_output(["nvcc", "--version"]).decode()
cuda_version = re.search(r"release (\S+),", nvcc_out)

Expand All @@ -105,31 +165,6 @@ def find_cl_path():
print(f"WARNING: Compute capabilities {compute_capabilities} are not all supported by the installed CUDA version {cuda_version}. Targeting {supported_compute_capabilities} instead.")
compute_capabilities = supported_compute_capabilities

min_compute_capability = min(compute_capabilities)

print(f"Targeting C++ standard {cpp_standard}")

base_nvcc_flags = [
f"-std=c++{cpp_standard}",
"--extended-lambda",
"--use_fast_math",
"--expt-relaxed-constexpr",
# The following definitions must be undefined
# since TCNN requires half-precision operation.
"-U__CUDA_NO_HALF_OPERATORS__",
"-U__CUDA_NO_HALF_CONVERSIONS__",
"-U__CUDA_NO_HALF2_OPERATORS__",
]

if os.name == "posix":
base_cflags = [f"-std=c++{cpp_standard}"]
base_nvcc_flags += [
"-Xcompiler=-Wno-float-conversion",
"-Xcompiler=-fno-strict-aliasing",
]
elif os.name == "nt":
base_cflags = [f"/std:c++{cpp_standard}"]


# Some containers set this to contain old architectures that won't compile. We only need the one installed in the machine.
os.environ["TORCH_CUDA_ARCH_LIST"] = ""
Expand All @@ -138,45 +173,6 @@ def find_cl_path():
bindings_dir = os.path.dirname(__file__)
root_dir = os.path.abspath(os.path.join(bindings_dir, "../.."))

base_definitions = [
# PyTorch-supplied parameters may be unaligned. TCNN must be made aware of this such that
# it does not optimize for aligned memory accesses.
"-DTCNN_PARAMS_UNALIGNED",
"-DTCNN_RTC",
"-DTCNN_RTC_USE_FAST_MATH",
]

if "TCNN_HALF_PRECISION" in os.environ:
enable_half = os.environ["TCNN_HALF_PRECISION"].lower() in ["1", "true", "on", "yes"]
base_definitions.append(f"-DTCNN_HALF_PRECISION={int(enable_half)}")
print(f"Forcing TCNN_HALF_PRECISION to {'ON' if enable_half else 'OFF'}")
else:
if min_compute_capability == 61 or min_compute_capability <= 52:
enable_half = False
else:
enable_half = True
print(f"Auto-detecting TCNN_HALF_PRECISION: {'ON' if enable_half else 'OFF'} (Arch: {min_compute_capability})")
base_definitions.append(f"-DTCNN_HALF_PRECISION={int(enable_half)}")

base_source_files = [
"tinycudann/bindings.cpp",
"../../dependencies/fmt/src/format.cc",
"../../dependencies/fmt/src/os.cc",
"../../src/cpp_api.cu",
"../../src/common_host.cu",
"../../src/encoding.cu",
"../../src/object.cu",
"../../src/rtc_kernel.cu",
]

if include_networks:
base_source_files += [
"../../src/network.cu",
"../../src/cutlass_mlp.cu",
]
else:
base_definitions.append("-DTCNN_NO_NETWORKS")

# Copy headers required by RTC at runtime
rtc_dir = os.path.join(bindings_dir, "tinycudann", "rtc")
rtc_include_dir = os.path.join(rtc_dir, "include")
Expand All @@ -186,9 +182,9 @@ def find_cl_path():
os.makedirs(rtc_cache_dir, exist_ok=True)

nvcc_path = shutil.which("nvcc")
if nvcc_path is None:
if build_extensions and nvcc_path is None:
print(f"WARNING: could not find CUDA include directory. JIT compilation will not be supported.")
else:
if nvcc_path is not None:
cuda_include_dir = os.path.join(os.path.dirname(os.path.dirname(nvcc_path)), "include")

cuda_headers = glob(f"{cuda_include_dir}/cuda_fp16*") + glob(f"{cuda_include_dir}/vector*")
Expand All @@ -207,34 +203,102 @@ def copy_files(whence, files):
copy_files(f"{root_dir}/include", tcnn_headers)
copy_files(f"{root_dir}/dependencies", pcg32_headers)

def make_extension(compute_capability):
nvcc_flags = base_nvcc_flags + [f"-gencode=arch=compute_{compute_capability},code={code}_{compute_capability}" for code in ["compute", "sm"]]
definitions = base_definitions + [f"-DTCNN_MIN_GPU_ARCH={compute_capability}"]
cmdclass = {}

if build_extensions:
min_compute_capability = min(compute_capabilities)

print(f"Targeting C++ standard {cpp_standard}")

base_nvcc_flags = [
f"-std=c++{cpp_standard}",
"--extended-lambda",
"--use_fast_math",
"--expt-relaxed-constexpr",
# The following definitions must be undefined
# since TCNN requires half-precision operation.
"-U__CUDA_NO_HALF_OPERATORS__",
"-U__CUDA_NO_HALF_CONVERSIONS__",
"-U__CUDA_NO_HALF2_OPERATORS__",
]

if os.name == "posix":
base_cflags = [f"-std=c++{cpp_standard}"]
base_nvcc_flags += [
"-Xcompiler=-Wno-float-conversion",
"-Xcompiler=-fno-strict-aliasing",
]
elif os.name == "nt":
base_cflags = [f"/std:c++{cpp_standard}"]

base_definitions = [
# PyTorch-supplied parameters may be unaligned. TCNN must be made aware of this such that
# it does not optimize for aligned memory accesses.
"-DTCNN_PARAMS_UNALIGNED",
"-DTCNN_RTC",
"-DTCNN_RTC_USE_FAST_MATH",
]

if include_networks and compute_capability > 70:
source_files = base_source_files + ["../../src/fully_fused_mlp.cu"]
if "TCNN_HALF_PRECISION" in os.environ:
enable_half = os.environ["TCNN_HALF_PRECISION"].lower() in ["1", "true", "on", "yes"]
base_definitions.append(f"-DTCNN_HALF_PRECISION={int(enable_half)}")
print(f"Forcing TCNN_HALF_PRECISION to {'ON' if enable_half else 'OFF'}")
else:
source_files = base_source_files

nvcc_flags = nvcc_flags + definitions
cflags = base_cflags + definitions

ext = CUDAExtension(
name=f"tinycudann_bindings._{compute_capability}_C",
sources=source_files,
include_dirs=[
f"{root_dir}/include",
f"{root_dir}/dependencies",
f"{root_dir}/dependencies/cutlass/include",
f"{root_dir}/dependencies/cutlass/tools/util/include",
f"{root_dir}/dependencies/fmt/include",
],
extra_compile_args={"cxx": cflags, "nvcc": nvcc_flags},
libraries=["cuda", "nvrtc"],
)
return ext
if min_compute_capability == 61 or min_compute_capability <= 52:
enable_half = False
else:
enable_half = True
print(f"Auto-detecting TCNN_HALF_PRECISION: {'ON' if enable_half else 'OFF'} (Arch: {min_compute_capability})")
base_definitions.append(f"-DTCNN_HALF_PRECISION={int(enable_half)}")

base_source_files = [
"tinycudann/bindings.cpp",
"../../dependencies/fmt/src/format.cc",
"../../dependencies/fmt/src/os.cc",
"../../src/cpp_api.cu",
"../../src/common_host.cu",
"../../src/encoding.cu",
"../../src/object.cu",
"../../src/rtc_kernel.cu",
]

ext_modules = [make_extension(comp) for comp in compute_capabilities]
if include_networks:
base_source_files += [
"../../src/network.cu",
"../../src/cutlass_mlp.cu",
]
else:
base_definitions.append("-DTCNN_NO_NETWORKS")

def make_extension(compute_capability):
nvcc_flags = base_nvcc_flags + [f"-gencode=arch=compute_{compute_capability},code={code}_{compute_capability}" for code in ["compute", "sm"]]
definitions = base_definitions + [f"-DTCNN_MIN_GPU_ARCH={compute_capability}"]

if include_networks and compute_capability > 70:
source_files = base_source_files + ["../../src/fully_fused_mlp.cu"]
else:
source_files = base_source_files

nvcc_flags = nvcc_flags + definitions
cflags = base_cflags + definitions

ext = CUDAExtension(
name=f"tinycudann_bindings._{compute_capability}_C",
sources=source_files,
include_dirs=[
f"{root_dir}/include",
f"{root_dir}/dependencies",
f"{root_dir}/dependencies/cutlass/include",
f"{root_dir}/dependencies/cutlass/tools/util/include",
f"{root_dir}/dependencies/fmt/include",
],
extra_compile_args={"cxx": cflags, "nvcc": nvcc_flags},
libraries=["cuda", "nvrtc"],
)
return ext

ext_modules = [make_extension(comp) for comp in compute_capabilities]
cmdclass = {"build_ext": BuildExtension}

def package_files(directory):
paths = []
Expand Down Expand Up @@ -273,5 +337,5 @@ def package_files(directory):
include_package_data=True,
zip_safe=False,
ext_modules=ext_modules,
cmdclass={"build_ext": BuildExtension}
cmdclass=cmdclass
)