1
0
mirror of https://git.savannah.gnu.org/git/guix.git synced 2026-05-28 03:51:53 +02:00

gnu: python-pytorch-for-r-torch: Update to 2.7.1.

* gnu/packages/patches/python-pytorch-for-r-torch-fix-codegen.patch,
  gnu/packages/patches/python-pytorch-for-r-torch-system-libraries.patch: Update.
* gnu/packages/patches/python-pytorch-for-r-torch-without-kineto.patch: New file.
* gnu/local.mk (dist_patch_DATA): Record it.
* gnu/packages/machine-learning.scm
(python-pytorch-for-r-torch): Update to 2.7.1.
[source]: Use new patch.
[arguments]: Remove phase 'fix-aten-vec; copy and adjust 'use-system-libraries
phase from python-pytorch.
[inputs]: Inherit all from python-pytorch; replace gloo with gloo-for-r-torch.
[native-inputs]: Inherit all from python-pytorch.
[propagated-inputs]: Inherit all from python-pytorch.

Change-Id: Ib2cf511fc34f609bbc7e92971720b00c4523419f
This commit is contained in:
Ricardo Wurmus
2025-11-07 17:45:02 +01:00
parent 0f2df2dad5
commit 02f59daf07
5 changed files with 353 additions and 192 deletions
+1
View File
@@ -2176,6 +2176,7 @@ dist_patch_DATA = \
%D%/packages/patches/python-pytorch-fix-codegen.patch \ %D%/packages/patches/python-pytorch-fix-codegen.patch \
%D%/packages/patches/python-pytorch-for-r-torch-fix-codegen.patch \ %D%/packages/patches/python-pytorch-for-r-torch-fix-codegen.patch \
%D%/packages/patches/python-pytorch-for-r-torch-system-libraries.patch \ %D%/packages/patches/python-pytorch-for-r-torch-system-libraries.patch \
%D%/packages/patches/python-pytorch-for-r-torch-without-kineto.patch \
%D%/packages/patches/python-pytorch-runpath.patch \ %D%/packages/patches/python-pytorch-runpath.patch \
%D%/packages/patches/python-pytorch-system-libraries.patch \ %D%/packages/patches/python-pytorch-system-libraries.patch \
%D%/packages/patches/python-pytorch-without-kineto.patch \ %D%/packages/patches/python-pytorch-without-kineto.patch \
+64 -25
View File
@@ -4909,7 +4909,7 @@ in the audio domain.")
(delete 'disable-avx-dependencies))))) (delete 'disable-avx-dependencies)))))
(supported-systems '("x86_64-linux")))) (supported-systems '("x86_64-linux"))))
(define %python-pytorch-for-r-torch-version "2.0.1") (define %python-pytorch-for-r-torch-version "2.7.1")
(define %python-pytorch-for-r-torch-src (define %python-pytorch-for-r-torch-src
(origin (origin
@@ -4921,11 +4921,11 @@ in the audio domain.")
%python-pytorch-for-r-torch-version)) %python-pytorch-for-r-torch-version))
(sha256 (sha256
(base32 (base32
"0iirrn687i7sfv0p0i7dn89x3rf13a7l8y1y5h190h51yjxpxqxa")) "0734kfm66hsqdzgs2s4wj5yagvifijbgb0c5wfmp3qcdrraa9x57"))
(patches (search-patches (patches (search-patches
"python-pytorch-for-r-torch-system-libraries.patch" "python-pytorch-for-r-torch-system-libraries.patch"
"python-pytorch-runpath.patch" "python-pytorch-runpath.patch"
"python-pytorch-without-kineto.patch" "python-pytorch-for-r-torch-without-kineto.patch"
;; Some autogeneration scripts depend on the ;; Some autogeneration scripts depend on the
;; compile PyTorch library. Therefore, we create ;; compile PyTorch library. Therefore, we create
;; dummy versions which are regenerated later. ;; dummy versions which are regenerated later.
@@ -4952,34 +4952,73 @@ in the audio domain.")
(name "python-pytorch") (name "python-pytorch")
(version %python-pytorch-for-r-torch-version) (version %python-pytorch-for-r-torch-version)
(source %python-pytorch-for-r-torch-src) (source %python-pytorch-for-r-torch-src)
(inputs
(modify-inputs (package-inputs python-pytorch)
(replace "gloo" gloo-for-r-torch)))
(arguments (arguments
(substitute-keyword-arguments (package-arguments python-pytorch) (substitute-keyword-arguments (package-arguments python-pytorch)
((#:phases phases) ((#:phases phases)
#~(modify-phases #$phases #~(modify-phases #$phases
;; See https://github.com/pytorch/pytorch/issues/61244 (replace 'use-system-libraries
(add-after 'unpack 'fix-aten-vec
(lambda _ (lambda _
(for-each
(lambda (file)
;; Check whether the files exist for the
;; python-pytorch-for-r-torch package
(when (file-exists? file)
(substitute* file
(("\"miniz\\.h\"") "<miniz/miniz.h>")
(("<miniz\\.h>") "<miniz/miniz.h>"))))
'("caffe2/serialize/crc.cc"
"caffe2/serialize/inline_container.cc"
"torch/csrc/inductor/aoti_package/model_package_loader.cpp"))
(substitute* "aten/src/ATen/native/vulkan/api/Allocator.h"
(("<include/vk_mem_alloc.h>")
"<vk_mem_alloc.h>"))
;; Fix missing <algorithm> header for std::for_each in Vulkan API
(substitute* "aten/src/ATen/native/vulkan/api/QueryPool.cpp"
(("#include <utility>" all)
(string-append all "\n#include <algorithm>")))
;; For Vulkan
(substitute* "CMakeLists.txt"
(("append_cxx_flag.*-Werror=(return-type|range-loop-construct).*") ""))
(substitute* (substitute*
'("aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h" (cons*
"aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h") "torch/csrc/Module.cpp"
(("map\\(const __") "map(__")))))))) (map
(native-inputs (lambda (name)
(modify-inputs (package-native-inputs python-pytorch) (string-append
(replace "ideep-pytorch" ideep-pytorch-for-r-torch))) "torch/utils/benchmark/utils/valgrind_wrapper/"
(inputs name))
(modify-inputs (package-inputs python-pytorch) '("compat_bindings.cpp" "timer_callgrind_template.cpp")))
(prepend foxi) (("<callgrind.h>") "<valgrind/callgrind.h>"))
(prepend qnnpack) (setenv "USE_VULKAN" "1")
(replace "qnnpack-pytorch" qnnpack-pytorch-for-r-torch) ;; Tell 'setup.py' to let 'CMakeLists.txt' know that we
(replace "oneapi-dnnl" oneapi-dnnl-for-r-torch) ;; want to use "system libraries" instead of the bundled
(replace "xnnpack" xnnpack-for-r-torch))) ;; ones.
(propagated-inputs (setenv "USE_SYSTEM_LIBS" "1")
(modify-inputs (package-propagated-inputs python-pytorch) ;; For oneDNN
(append python-filelock (setenv "USE_MKLDNN" "1")
python-jinja2 ;; Only works with CUPTI
python-networkx (setenv "USE_KINETO" "0")
python-opt-einsum ;; Prevent CMake error by disabling explicitely
python-sympy))))) (setenv "USE_ITT" "0")
;; Disable on unsupported systems
(if #$(not (member
(or (%current-target-system)
(%current-system))
(package-transitive-supported-systems qnnpack)))
(setenv "USE_QNNPACK" "0"))
(substitute* '("requirements.txt" "setup.py")
(("sympy>=1\\.13\\.3")
"sympy>=1.13.1"))))
(replace 'skip-nccl-call
(lambda _
;; Comment-out `checkout_nccl()` invokation in build_pytorch().
(substitute* "tools/build_pytorch_libs.py"
(("^[[:blank:]]*checkout_nccl\\(\\)" all)
(string-append "# " all "\n pass")))))))))))
(define-public python-pytorch-geometric (define-public python-pytorch-geometric
(package (package
@@ -6,7 +6,7 @@ is later corrected. codegen_external.py is patched to avoid duplicate
functions and add the static keyword as in the existing generated file. functions and add the static keyword as in the existing generated file.
diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh
index cc0263dbbf..ac34e84b82 100644 index cc0263dbb..ac34e84b8 100644
--- a/tools/gen_flatbuffers.sh --- a/tools/gen_flatbuffers.sh
+++ b/tools/gen_flatbuffers.sh +++ b/tools/gen_flatbuffers.sh
@@ -1,13 +1,13 @@ @@ -1,13 +1,13 @@
@@ -32,10 +32,10 @@ index cc0263dbbf..ac34e84b82 100644
-c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs" -c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs"
echo '// @generated' >> "$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h" echo '// @generated' >> "$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h"
diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py b/torch/csrc/jit/tensorexpr/codegen_external.py diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py b/torch/csrc/jit/tensorexpr/codegen_external.py
index 120520b139..0c8587f02d 100644 index 5dcf1b284..0e20b0c10 100644
--- a/torch/csrc/jit/tensorexpr/codegen_external.py --- a/torch/csrc/jit/tensorexpr/codegen_external.py
+++ b/torch/csrc/jit/tensorexpr/codegen_external.py +++ b/torch/csrc/jit/tensorexpr/codegen_external.py
@@ -16,9 +16,14 @@ def gen_external(native_functions_path, tags_path, external_path): @@ -21,9 +21,14 @@ def gen_external(native_functions_path, tags_path, external_path):
native_functions = parse_native_yaml(native_functions_path, tags_path) native_functions = parse_native_yaml(native_functions_path, tags_path)
func_decls = [] func_decls = []
func_registrations = [] func_registrations = []
@@ -51,7 +51,7 @@ index 120520b139..0c8587f02d 100644
args = schema.arguments args = schema.arguments
# Only supports extern calls for functions with out variants # Only supports extern calls for functions with out variants
if not schema.is_out_fn(): if not schema.is_out_fn():
@@ -48,7 +53,7 @@ def gen_external(native_functions_path, tags_path, external_path): @@ -63,7 +68,7 @@ def gen_external(native_functions_path, tags_path, external_path):
# print(tensor_decls, name, arg_names) # print(tensor_decls, name, arg_names)
func_decl = f"""\ func_decl = f"""\
@@ -61,7 +61,7 @@ index 120520b139..0c8587f02d 100644
void** buf_data, void** buf_data,
int64_t* buf_ranks, int64_t* buf_ranks,
diff --git a/torchgen/decompositions/gen_jit_decompositions.py b/torchgen/decompositions/gen_jit_decompositions.py diff --git a/torchgen/decompositions/gen_jit_decompositions.py b/torchgen/decompositions/gen_jit_decompositions.py
index 7cfbb803f9..2e69bb1868 100644 index b42948045..e1cfc73a5 100644
--- a/torchgen/decompositions/gen_jit_decompositions.py --- a/torchgen/decompositions/gen_jit_decompositions.py
+++ b/torchgen/decompositions/gen_jit_decompositions.py +++ b/torchgen/decompositions/gen_jit_decompositions.py
@@ -1,8 +1,12 @@ @@ -1,8 +1,12 @@
@@ -76,9 +76,9 @@ index 7cfbb803f9..2e69bb1868 100644
+else: +else:
+ decomposition_table = {} + decomposition_table = {}
# from torchgen.code_template import CodeTemplate
@@ -85,7 +89,7 @@ def write_decomposition_util_file(path: str) -> None: # from torchgen.code_template import CodeTemplate
@@ -86,7 +90,7 @@ def write_decomposition_util_file(path: str) -> None:
def main() -> None: def main() -> None:
@@ -88,40 +88,41 @@ index 7cfbb803f9..2e69bb1868 100644
write_decomposition_util_file(str(upgrader_path)) write_decomposition_util_file(str(upgrader_path))
diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py
index e5287cffc5..57f3c38096 100644 index 845034cb7..a1c5767c2 100644
--- a/torchgen/operator_versions/gen_mobile_upgraders.py --- a/torchgen/operator_versions/gen_mobile_upgraders.py
+++ b/torchgen/operator_versions/gen_mobile_upgraders.py +++ b/torchgen/operator_versions/gen_mobile_upgraders.py
@@ -2,10 +2,12 @@ @@ -6,10 +6,13 @@ import os
import os
from enum import Enum from enum import Enum
from operator import itemgetter
from pathlib import Path from pathlib import Path
+import sys +import sys
from typing import Any, Dict, List from typing import Any
-import torch -import torch
-from torch.jit.generate_bytecode import generate_upgraders_bytecode -from torch.jit.generate_bytecode import generate_upgraders_bytecode
+if len(sys.argv) < 2 or sys.argv[1] != "dummy": +if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+ import torch + import torch
+ from torch.jit.generate_bytecode import generate_upgraders_bytecode + from torch.jit.generate_bytecode import generate_upgraders_bytecode
+
from torchgen.code_template import CodeTemplate from torchgen.code_template import CodeTemplate
from torchgen.operator_versions.gen_mobile_upgraders_constant import ( from torchgen.operator_versions.gen_mobile_upgraders_constant import (
@@ -262,7 +264,10 @@ def construct_register_size(register_size_from_yaml: int) -> str: MOBILE_UPGRADERS_HEADER_DESCRIPTION,
@@ -263,7 +266,10 @@ def construct_register_size(register_size_from_yaml: int) -> str:
def construct_version_maps( def construct_version_maps(
upgrader_bytecode_function_to_index_map: Dict[str, Any] upgrader_bytecode_function_to_index_map: dict[str, Any],
) -> str: ) -> str:
- version_map = torch._C._get_operator_version_map() - version_map = torch._C._get_operator_version_map()
+ if len(sys.argv) < 2 or sys.argv[1] != "dummy": + if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+ version_map = torch._C._get_operator_version_map() + version_map = torch._C._get_operator_version_map()
+ else: + else:
+ version_map = {} + version_map = {}
sorted_version_map_ = sorted(version_map.items(), key=lambda item: item[0]) # type: ignore[no-any-return] sorted_version_map_ = sorted(version_map.items(), key=itemgetter(0)) # type: ignore[no-any-return]
sorted_version_map = {name: lst for name, lst in sorted_version_map_} sorted_version_map = dict(sorted_version_map_)
@@ -375,7 +381,10 @@ def sort_upgrader(upgrader_list: list[dict[str, Any]]) -> list[dict[str, Any]]:
@@ -379,7 +384,10 @@ def sort_upgrader(upgrader_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
def main() -> None: def main() -> None:
- upgrader_list = generate_upgraders_bytecode() - upgrader_list = generate_upgraders_bytecode()
+ if len(sys.argv) < 2 or sys.argv[1] != "dummy": + if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+ upgrader_list = generate_upgraders_bytecode() + upgrader_list = generate_upgraders_bytecode()
@@ -131,16 +132,24 @@ index e5287cffc5..57f3c38096 100644
for up in sorted_upgrader_list: for up in sorted_upgrader_list:
print("after sort upgrader : ", next(iter(up))) print("after sort upgrader : ", next(iter(up)))
diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py b/torchgen/shape_functions/gen_jit_shape_functions.py diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py b/torchgen/shape_functions/gen_jit_shape_functions.py
index c6336a6951..34e394d818 100644 index 56a3d8bf0..ffd0785fd 100644
--- a/torchgen/shape_functions/gen_jit_shape_functions.py --- a/torchgen/shape_functions/gen_jit_shape_functions.py
+++ b/torchgen/shape_functions/gen_jit_shape_functions.py +++ b/torchgen/shape_functions/gen_jit_shape_functions.py
@@ -18,16 +18,20 @@ you are in the root directory of the Pytorch git repo""" @@ -1,6 +1,7 @@
#!/usr/bin/env python3
import os
import sys
+import importlib
from importlib.util import module_from_spec, spec_from_file_location
from itertools import chain
from pathlib import Path
@@ -18,17 +19,21 @@ you are in the root directory of the Pytorch git repo"""
if not file_path.exists(): if not file_path.exists():
raise Exception(err_msg) raise Exception(err_msg) # noqa: TRY002
-spec = importlib.util.spec_from_file_location(module_name, file_path) -spec = spec_from_file_location(module_name, file_path)
-assert spec is not None -assert spec is not None
-module = importlib.util.module_from_spec(spec) -module = module_from_spec(spec)
-sys.modules[module_name] = module -sys.modules[module_name] = module
-assert spec.loader is not None -assert spec.loader is not None
-assert module is not None -assert module is not None
@@ -148,6 +157,7 @@ index c6336a6951..34e394d818 100644
- -
-bounded_compute_graph_mapping = module.bounded_compute_graph_mapping -bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
-shape_compute_graph_mapping = module.shape_compute_graph_mapping -shape_compute_graph_mapping = module.shape_compute_graph_mapping
-
+if len(sys.argv) < 2 or sys.argv[1] != "dummy": +if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+ spec = importlib.util.spec_from_file_location(module_name, file_path) + spec = importlib.util.spec_from_file_location(module_name, file_path)
+ assert spec is not None + assert spec is not None
@@ -159,9 +169,10 @@ index c6336a6951..34e394d818 100644
+ +
+ bounded_compute_graph_mapping = module.bounded_compute_graph_mapping + bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
+ shape_compute_graph_mapping = module.shape_compute_graph_mapping + shape_compute_graph_mapping = module.shape_compute_graph_mapping
+
+else: +else:
+ bounded_compute_graph_mapping = {} + bounded_compute_graph_mapping = {}
+ shape_compute_graph_mapping = {} + shape_compute_graph_mapping = {}
SHAPE_HEADER = r""" SHAPE_HEADER = r"""
/**
@@ -1,16 +1,14 @@
Patch build files to also system libraries instead of bundled ones for the Patch build files to also system libraries instead of bundled ones for the
libraries not supported or working only by specifying USE_SYSTEM_LIBS. This libraries not supported or working only by specifying USE_SYSTEM_LIBS. This
includes using the clog, cpuinfo, fbgemm, foxi, fp16, fxdiv, googletest, includes using the clog, cpuinfo, fbgemm, foxi, fp16, fxdiv, googletest,
ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool, qnnpack, httlib, ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool,
qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages. qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages.
For QNNPACK, two versions were bundled and are required: The upstream one and
an internal fork (now in the package qnnpack-pytorch).
diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
index 96fc297..7f27b66 100644 index 085af373e..3287429b4 100644
--- a/aten/src/ATen/CMakeLists.txt --- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt +++ b/aten/src/ATen/CMakeLists.txt
@@ -362,9 +362,9 @@ if(AT_NNPACK_ENABLED) @@ -468,9 +468,9 @@ if(AT_NNPACK_ENABLED)
list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below
endif() endif()
@@ -21,13 +19,13 @@ index 96fc297..7f27b66 100644
+ list(APPEND ATen_CPU_DEPENDENCY_LIBS DNNL::dnnl) + list(APPEND ATen_CPU_DEPENDENCY_LIBS DNNL::dnnl)
+endif(USE_MKLDNN) +endif(USE_MKLDNN)
list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo) if(USE_MKLDNN_ACL)
list(APPEND ATen_CPU_INCLUDE ${ACL_INCLUDE_DIRS})
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index 221e3f3..417f601 100644 index d2d23b7ab..1a7e5a042 100644
--- a/caffe2/CMakeLists.txt --- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt
@@ -110,9 +110,6 @@ if(NOT MSVC AND USE_XNNPACK) @@ -91,9 +91,6 @@ if(NOT MSVC AND USE_XNNPACK)
if(NOT TARGET fxdiv) if(NOT TARGET fxdiv)
set(FXDIV_BUILD_TESTS OFF CACHE BOOL "") set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
@@ -37,7 +35,7 @@ index 221e3f3..417f601 100644
endif() endif()
endif() endif()
@@ -975,7 +972,6 @@ elseif(USE_CUDA) @@ -1135,7 +1132,6 @@ if(USE_XPU)
endif() endif()
if(NOT MSVC AND USE_XNNPACK) if(NOT MSVC AND USE_XNNPACK)
@@ -45,15 +43,26 @@ index 221e3f3..417f601 100644
endif() endif()
# ========================================================== # ==========================================================
@@ -1314,6 +1310,7 @@ target_link_libraries(torch_cpu PUBLIC c10) @@ -1254,8 +1250,8 @@ endif()
target_include_directories(torch_cpu PRIVATE
${TORCH_ROOT}/third_party/cpp-httplib)
-target_include_directories(torch_cpu PRIVATE
- ${TORCH_ROOT}/third_party/nlohmann/include)
+find_package(httplib REQUIRED)
+target_link_libraries(torch_cpu PUBLIC httplib::httplib)
install(DIRECTORY "${TORCH_SRC_DIR}/csrc"
DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch
@@ -1494,6 +1490,7 @@ target_link_libraries(torch_cpu PUBLIC c10)
target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS}) target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS}) target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS})
target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS}) target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
+target_link_libraries(torch_cpu PRIVATE miniz clog) +target_link_libraries(torch_cpu PRIVATE miniz clog)
target_include_directories(torch_cpu INTERFACE $<INSTALL_INTERFACE:include>) if(USE_MPI)
target_include_directories(torch_cpu PRIVATE ${Caffe2_CPU_INCLUDE}) target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX)
target_include_directories(torch_cpu SYSTEM PRIVATE "${Caffe2_DEPENDENCY_INCLUDE}") endif()
@@ -1570,7 +1567,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK) @@ -1728,7 +1725,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK)
add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}") add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}")
add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}") add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}")
target_link_libraries(static_runtime_bench torch_library benchmark) target_link_libraries(static_runtime_bench torch_library benchmark)
@@ -61,8 +70,8 @@ index 221e3f3..417f601 100644
+ target_link_libraries(static_runtime_test torch_library gtest_main gtest) + target_link_libraries(static_runtime_test torch_library gtest_main gtest)
endif() endif()
if(BUILD_TENSOREXPR_BENCHMARK) if(BUILD_MOBILE_BENCHMARK)
@@ -1601,7 +1598,7 @@ if(BUILD_MOBILE_TEST) @@ -1747,7 +1744,7 @@ if(BUILD_MOBILE_TEST)
foreach(test_src ${ATen_MOBILE_TEST_SRCS}) foreach(test_src ${ATen_MOBILE_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE) get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}") add_executable(${test_name} "${test_src}")
@@ -71,32 +80,61 @@ index 221e3f3..417f601 100644
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE}) target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE})
@@ -1622,13 +1619,13 @@ if(BUILD_TEST) @@ -1768,7 +1765,7 @@ if(BUILD_TEST)
if(NOT MSVC) if(NOT MSVC)
add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp) add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp)
# TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR) # TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR)
- target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main) - target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main nlohmann)
+ target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main gtest) + target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main gtest nlohmann)
if(USE_FBGEMM) if(USE_FBGEMM)
target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm) target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm)
endif() endif()
@@ -1782,7 +1779,7 @@ if(BUILD_TEST)
endif()
else() else()
add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}") add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}")
- target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main) - target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main)
+ target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main gtest) + target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main gtest)
endif() endif()
target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>) target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
@@ -1645,7 +1642,7 @@ if(BUILD_TEST) @@ -1799,7 +1796,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_CPU_TEST_SRCS}) foreach(test_src ${Caffe2_CPU_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE) get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}") add_executable(${test_name} "${test_src}")
- target_link_libraries(${test_name} torch_library gtest_main) - target_link_libraries(${test_name} torch_library gtest_main)
+ target_link_libraries(${test_name} torch_library gtest_main gtest) + target_link_libraries(${test_name} torch_library gtest_main gtest)
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) if(NOT MSVC)
target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) target_link_libraries(${test_name} stdc++)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) endif()
@@ -1703,7 +1700,7 @@ if(BUILD_TEST) @@ -1823,7 +1820,7 @@ if(BUILD_TEST)
add_executable(${test_name} "${test_src}")
find_library(metal NAMES Metal)
find_library(foundation NAMES Foundation)
- target_link_libraries(${test_name} torch_library gtest_main ${metal} ${foundation})
+ target_link_libraries(${test_name} torch_library gtest_main gtest ${metal} ${foundation})
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
@@ -1843,7 +1840,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_GPU_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}")
- target_link_libraries(${test_name} torch_library gtest_main)
+ target_link_libraries(${test_name} torch_library gtest_main gtest)
if(USE_CUDNN AND ${test_name} MATCHES "cudnn")
target_link_libraries(${test_name} torch::cudnn)
endif()
@@ -1865,7 +1862,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_XPU_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}")
- target_link_libraries(${test_name} torch_library gtest_main)
+ target_link_libraries(${test_name} torch_library gtest_main gtest)
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
@@ -1880,7 +1877,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_VULKAN_TEST_SRCS}) foreach(test_src ${Caffe2_VULKAN_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE) get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}") add_executable(${test_name} "${test_src}")
@@ -105,23 +143,32 @@ index 221e3f3..417f601 100644
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>) add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
@@ -1899,7 +1896,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_HIP_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}")
- target_link_libraries(${test_name} torch_library gtest_main)
+ target_link_libraries(${test_name} torch_library gtest_main gtest)
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
diff --git a/caffe2/serialize/CMakeLists.txt b/caffe2/serialize/CMakeLists.txt diff --git a/caffe2/serialize/CMakeLists.txt b/caffe2/serialize/CMakeLists.txt
index 1552b59..67e1a9a 100644 index ebbff0f29..dcded2590 100644
--- a/caffe2/serialize/CMakeLists.txt --- a/caffe2/serialize/CMakeLists.txt
+++ b/caffe2/serialize/CMakeLists.txt +++ b/caffe2/serialize/CMakeLists.txt
@@ -2,7 +2,6 @@ file(GLOB tmp *_test.cc) @@ -2,7 +2,6 @@ file(GLOB tmp *_test.cc)
set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp}) set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
list(APPEND Caffe2_CPU_SRCS list(APPEND Caffe2_CPU_SRCS
- ${PROJECT_SOURCE_DIR}/third_party/miniz-2.1.0/miniz.c - ${PROJECT_SOURCE_DIR}/third_party/miniz-3.0.2/miniz.c
${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc ${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc
${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc ${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc
${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc ${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 8c0e3c2..d65576a 100644 index be45936a8..bb1aa1cc1 100644
--- a/cmake/Dependencies.cmake --- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake
@@ -298,7 +298,7 @@ endif() @@ -276,7 +276,7 @@ endif()
# --- [ PocketFFT # --- [ PocketFFT
set(AT_POCKETFFT_ENABLED 0) set(AT_POCKETFFT_ENABLED 0)
if(NOT AT_MKL_ENABLED) if(NOT AT_MKL_ENABLED)
@@ -130,27 +177,7 @@ index 8c0e3c2..d65576a 100644
if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}") if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}")
message(FATAL_ERROR "pocketfft directory not found, expected ${POCKETFFT_INCLUDE_DIR}") message(FATAL_ERROR "pocketfft directory not found, expected ${POCKETFFT_INCLUDE_DIR}")
elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h") elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h")
@@ -501,19 +501,6 @@ if(USE_QNNPACK) @@ -460,15 +460,6 @@ if(USE_PYTORCH_QNNPACK)
set(QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
set(QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
set(QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
- add_subdirectory(
- "${QNNPACK_SOURCE_DIR}"
- "${CONFU_DEPENDENCIES_BINARY_DIR}/QNNPACK")
-
- # TODO: See https://github.com/pytorch/pytorch/issues/56285
- if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- target_compile_options(qnnpack PRIVATE -Wno-deprecated-declarations)
- endif()
-
- # We build static versions of QNNPACK and pthreadpool but link
- # them into a shared library for Caffe2, so they need PIC.
- set_property(TARGET qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
- set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
if(QNNPACK_CUSTOM_THREADPOOL)
target_compile_definitions(
@@ -562,13 +549,6 @@ if(USE_PYTORCH_QNNPACK)
set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "") set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "") set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "") set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
@@ -161,10 +188,33 @@ index 8c0e3c2..d65576a 100644
- # them into a shared library for Caffe2, so they need PIC. - # them into a shared library for Caffe2, so they need PIC.
- set_property(TARGET pytorch_qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON) - set_property(TARGET pytorch_qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
- set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON) - set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
- # QNNPACK depends on gemmlowp headers
- target_include_directories(pytorch_qnnpack PRIVATE "${CAFFE2_THIRD_PARTY_ROOT}/gemmlowp")
endif()
if(PYTORCH_QNNPACK_CUSTOM_THREADPOOL) list(APPEND Caffe2_DEPENDENCY_LIBS pytorch_qnnpack)
target_compile_definitions( @@ -558,16 +549,15 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK)
@@ -750,11 +730,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST) list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod)
elseif(NOT TARGET XNNPACK AND USE_SYSTEM_XNNPACK)
add_library(XNNPACK SHARED IMPORTED)
- add_library(microkernels-prod SHARED IMPORTED)
+ add_library(microkernels-prod INTERFACE IMPORTED)
find_library(XNNPACK_LIBRARY XNNPACK)
- find_library(microkernels-prod_LIBRARY microkernels-prod)
set_property(TARGET XNNPACK PROPERTY IMPORTED_LOCATION "${XNNPACK_LIBRARY}")
- set_property(TARGET microkernels-prod PROPERTY IMPORTED_LOCATION "${microkernels-prod_LIBRARY}")
- if(NOT XNNPACK_LIBRARY or NOT microkernels-prod_LIBRARY)
+ set_property(TARGET microkernels-prod PROPERTY INTERFACE_LINK_LIBRARIES XNNPACK)
+ if(NOT XNNPACK_LIBRARY)
message(FATAL_ERROR "Cannot find XNNPACK")
endif()
message("-- Found XNNPACK: ${XNNPACK_LIBRARY}")
- list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod)
+ list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK)
endif()
# ---[ Vulkan deps
@@ -650,11 +640,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
# this shouldn't be necessary anymore. # this shouldn't be necessary anymore.
get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES) get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "") set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "")
@@ -176,9 +226,9 @@ index 8c0e3c2..d65576a 100644
# We will not need to test benchmark lib itself. # We will not need to test benchmark lib itself.
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.") set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.")
@@ -829,16 +804,6 @@ if(USE_FBGEMM) @@ -732,16 +717,6 @@ if(USE_FBGEMM)
else() if(USE_ASAN)
set(FBGEMM_LIBRARY_TYPE "static" CACHE STRING "") set(USE_SANITIZER "address,undefined" CACHE STRING "-fsanitize options for FBGEMM")
endif() endif()
- add_subdirectory("${FBGEMM_SOURCE_DIR}") - add_subdirectory("${FBGEMM_SOURCE_DIR}")
- set_property(TARGET fbgemm_generic PROPERTY POSITION_INDEPENDENT_CODE ON) - set_property(TARGET fbgemm_generic PROPERTY POSITION_INDEPENDENT_CODE ON)
@@ -190,44 +240,39 @@ index 8c0e3c2..d65576a 100644
- target_compile_options_if_supported(asmjit -Wno-deprecated-copy) - target_compile_options_if_supported(asmjit -Wno-deprecated-copy)
- target_compile_options_if_supported(asmjit -Wno-unused-but-set-variable) - target_compile_options_if_supported(asmjit -Wno-unused-but-set-variable)
- endif() - endif()
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
target_compile_options_if_supported(asmjit -Wno-extra-semi)
target_compile_options_if_supported(fbgemm -Wno-extra-semi)
@@ -829,7 +804,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
"${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
endif() endif()
if(USE_FBGEMM)
@@ -1001,7 +966,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
"${FP16_SOURCE_DIR}"
"${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16) elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16)
- add_library(fp16 STATIC "/usr/include/fp16.h") - add_library(fp16 STATIC "/usr/include/fp16.h")
+ add_library(fp16 STATIC "#FP16_INCLUDE_DIR") + add_library(fp16 STATIC "#FP16_INCLUDE_DIR")
set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C) set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C)
endif() endif()
list(APPEND Caffe2_DEPENDENCY_LIBS fp16) list(APPEND Caffe2_DEPENDENCY_LIBS fp16)
@@ -1395,7 +1360,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE) @@ -1170,10 +1145,9 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
message(WARNING "Archived TensorPipe forces CMake compatibility mode")
# Tensorpipe uses cuda_add_library set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
torch_update_find_cuda_flags()
- add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe)
if(USE_CUDA)
@@ -1551,7 +1515,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
endif() endif()
endif() - add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL) # Suppress warning to unblock libnop comiplation by clang-17
# See https://github.com/pytorch/pytorch/issues/151316
add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE}) target_compile_options_if_supported(tensorpipe -Wno-missing-template-arg-list-after-template-kw)
if(NOT USE_SYSTEM_ONNX) if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
@@ -1582,7 +1545,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) unset(CMAKE_POLICY_VERSION_MINIMUM)
endif()
@@ -1340,7 +1314,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
endif() endif()
set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY}) set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY})
message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}") message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
- list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx) - list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
+ list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx onnx_optimizer) + list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx onnx_optimizer)
endif() endif()
include_directories(${FOXI_INCLUDE_DIRS}) # Recover the build shared libs option.
list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
@@ -1752,9 +1715,8 @@ if(NOT INTERN_BUILD_MOBILE) @@ -1500,9 +1474,8 @@ if(NOT INTERN_BUILD_MOBILE)
endif() endif()
if(USE_MKLDNN) if(USE_MKLDNN)
include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake) include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake)
@@ -235,10 +280,10 @@ index 8c0e3c2..d65576a 100644
+ if(DNNL_FOUND) + if(DNNL_FOUND)
set(AT_MKLDNN_ENABLED 1) set(AT_MKLDNN_ENABLED 1)
- include_directories(AFTER SYSTEM ${MKLDNN_INCLUDE_DIR}) - include_directories(AFTER SYSTEM ${MKLDNN_INCLUDE_DIR})
if(BUILD_CAFFE2_OPS) else()
list(APPEND Caffe2_DEPENDENCY_LIBS caffe2::mkldnn) message(WARNING "MKLDNN could not be found.")
endif(BUILD_CAFFE2_OPS) caffe2_update_option(USE_MKLDNN OFF)
@@ -1819,7 +1781,7 @@ endif() @@ -1583,7 +1556,7 @@ endif()
# #
set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
@@ -247,7 +292,7 @@ index 8c0e3c2..d65576a 100644
# Disable compiler feature checks for `fmt`. # Disable compiler feature checks for `fmt`.
# #
@@ -1828,7 +1790,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) @@ -1592,7 +1565,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
# CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
# `fmt` is compatible with a superset of the compilers that PyTorch is, it # `fmt` is compatible with a superset of the compilers that PyTorch is, it
# shouldn't be too bad to just disable the checks. # shouldn't be too bad to just disable the checks.
@@ -256,7 +301,7 @@ index 8c0e3c2..d65576a 100644
list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE) set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
diff --git a/cmake/External/nnpack.cmake b/cmake/External/nnpack.cmake diff --git a/cmake/External/nnpack.cmake b/cmake/External/nnpack.cmake
index a41343c..6075bdd 100644 index 8a4a310d6..f413d2e61 100644
--- a/cmake/External/nnpack.cmake --- a/cmake/External/nnpack.cmake
+++ b/cmake/External/nnpack.cmake +++ b/cmake/External/nnpack.cmake
@@ -40,7 +40,7 @@ endif() @@ -40,7 +40,7 @@ endif()
@@ -268,7 +313,7 @@ index a41343c..6075bdd 100644
message(STATUS "Brace yourself, we are building NNPACK") message(STATUS "Brace yourself, we are building NNPACK")
set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party) set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party)
@@ -114,6 +114,5 @@ endif() @@ -94,6 +94,5 @@ endif()
# (4) Catch-all: not supported. # (4) Catch-all: not supported.
############################################################################## ##############################################################################
@@ -278,7 +323,7 @@ index a41343c..6075bdd 100644
+set(NNPACK_FOUND TRUE) +set(NNPACK_FOUND TRUE)
+set(USE_NNPACK ON) +set(USE_NNPACK ON)
diff --git a/cmake/public/mkldnn.cmake b/cmake/public/mkldnn.cmake diff --git a/cmake/public/mkldnn.cmake b/cmake/public/mkldnn.cmake
index 50404d3..ca067f0 100644 index 87935625f..9f8fa3df8 100644
--- a/cmake/public/mkldnn.cmake --- a/cmake/public/mkldnn.cmake
+++ b/cmake/public/mkldnn.cmake +++ b/cmake/public/mkldnn.cmake
@@ -4,7 +4,7 @@ if(CPU_AARCH64) @@ -4,7 +4,7 @@ if(CPU_AARCH64)
@@ -290,105 +335,93 @@ index 50404d3..ca067f0 100644
if(NOT TARGET caffe2::mkldnn) if(NOT TARGET caffe2::mkldnn)
add_library(caffe2::mkldnn INTERFACE IMPORTED) add_library(caffe2::mkldnn INTERFACE IMPORTED)
@@ -15,7 +15,7 @@ set_property( @@ -15,4 +15,4 @@ set_property(
${MKLDNN_INCLUDE_DIR}) ${MKLDNN_INCLUDE_DIR})
set_property( set_property(
TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES
- ${MKLDNN_LIBRARIES}) - ${MKLDNN_LIBRARIES})
+ DNNL::dnnl) + DNNL::dnnl)
if(BUILD_ONEDNN_GRAPH)
if(NOT TARGET caffe2::dnnl_graph)
add_library(caffe2::dnnl_graph INTERFACE IMPORTED)
diff --git a/setup.py b/setup.py diff --git a/setup.py b/setup.py
index 34b2854..5db117f 100644 index 61ee9363f..3691cc35c 100644
--- a/setup.py --- a/setup.py
+++ b/setup.py +++ b/setup.py
@@ -418,13 +418,9 @@ def build_deps(): @@ -508,13 +508,9 @@ def build_deps():
# Windows has very poor support for them. # Windows has very poor support for them.
sym_files = [ sym_files = [
'tools/shared/_utils_internal.py', "tools/shared/_utils_internal.py",
- 'torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h', - "torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h",
- 'torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h', - "torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h",
] ]
orig_files = [ orig_files = [
'torch/_utils_internal.py', "torch/_utils_internal.py",
- 'third_party/valgrind-headers/callgrind.h', - "third_party/valgrind-headers/callgrind.h",
- 'third_party/valgrind-headers/valgrind.h', - "third_party/valgrind-headers/valgrind.h",
] ]
for sym_file, orig_file in zip(sym_files, orig_files): for sym_file, orig_file in zip(sym_files, orig_files):
same = False same = False
diff --git a/test/cpp/c10d/CMakeLists.txt b/test/cpp/c10d/CMakeLists.txt diff --git a/test/cpp/c10d/CMakeLists.txt b/test/cpp/c10d/CMakeLists.txt
index 89c6b91..0c60d08 100644 index 5b423241d..e069accd6 100644
--- a/test/cpp/c10d/CMakeLists.txt --- a/test/cpp/c10d/CMakeLists.txt
+++ b/test/cpp/c10d/CMakeLists.txt +++ b/test/cpp/c10d/CMakeLists.txt
@@ -16,14 +16,14 @@ function(c10d_add_test test_src) @@ -26,17 +26,17 @@ function(c10d_add_test test_src)
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>) endif()
endfunction() endfunction()
-c10d_add_test(FileStoreTest.cpp torch_cpu gtest_main) -c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
-c10d_add_test(TCPStoreTest.cpp torch_cpu gtest_main) -c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
+c10d_add_test(FileStoreTest.cpp torch_cpu gtest_main gtest) -c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
+c10d_add_test(TCPStoreTest.cpp torch_cpu gtest_main gtest) +c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF)
if(INSTALL_TEST) +c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
install(TARGETS FileStoreTest DESTINATION bin) +c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
install(TARGETS TCPStoreTest DESTINATION bin)
endif()
if(NOT WIN32) if(NOT WIN32)
- c10d_add_test(HashStoreTest.cpp torch_cpu gtest_main) - c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
+ c10d_add_test(HashStoreTest.cpp torch_cpu gtest_main gtest) + c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
if(INSTALL_TEST) endif()
install(TARGETS HashStoreTest DESTINATION bin)
endif()
@@ -31,11 +31,11 @@ endif()
if(USE_CUDA) if(USE_CUDA)
if(USE_GLOO AND USE_C10D_GLOO) if(USE_GLOO AND USE_C10D_GLOO)
- c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu c10d_cuda_test gtest_main) - c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
+ c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu c10d_cuda_test gtest_main gtest) - c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
if(INSTALL_TEST) + c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
install(TARGETS ProcessGroupGlooTest DESTINATION bin) + c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
endif()
- c10d_add_test(ProcessGroupGlooAsyncTest.cpp torch_cpu c10d_cuda_test gtest_main)
+ c10d_add_test(ProcessGroupGlooAsyncTest.cpp torch_cpu c10d_cuda_test gtest_main gtest)
endif() endif()
if(USE_NCCL AND USE_C10D_NCCL) if(USE_NCCL AND USE_C10D_NCCL)
# NCCL is a private dependency of libtorch, but the tests include some # NCCL is a private dependency of libtorch, but the tests include some
@@ -44,10 +44,10 @@ if(USE_CUDA) @@ -45,10 +45,10 @@ if(USE_CUDA)
# a private dependency of the tests as well. # a private dependency of the tests as well.
c10d_add_test( c10d_add_test(
ProcessGroupNCCLTest.cpp ProcessGroupNCCLTest.cpp
- torch_cpu c10d_cuda_test gtest_main __caffe2_nccl) - LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
+ torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl) + LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
c10d_add_test( c10d_add_test(
ProcessGroupNCCLErrorsTest.cpp ProcessGroupNCCLErrorsTest.cpp
- torch_cpu c10d_cuda_test gtest_main __caffe2_nccl) - LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
+ torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl) + LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
if(INSTALL_TEST) if(INSTALL_TEST)
install(TARGETS ProcessGroupNCCLTest DESTINATION bin) install(TARGETS c10d_cuda_test DESTINATION lib)
install(TARGETS ProcessGroupNCCLErrorsTest DESTINATION bin) endif()
@@ -61,7 +61,7 @@ if(USE_CUDA) @@ -60,14 +60,14 @@ if(USE_CUDA)
# a private dependency of the tests as well. # a private dependency of the tests as well.
c10d_add_test( c10d_add_test(
ProcessGroupUCCTest.cpp ProcessGroupUCCTest.cpp
- torch_cpu c10d_cuda_test gtest_main __caffe2_ucc) - LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_ucc INSTALL_TEST ${INSTALL_TEST})
+ torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc) + LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc INSTALL_TEST ${INSTALL_TEST})
if(INSTALL_TEST) if(INSTALL_TEST)
install(TARGETS ProcessGroupUCCTest DESTINATION bin)
install(TARGETS c10d_cuda_test DESTINATION lib) install(TARGETS c10d_cuda_test DESTINATION lib)
@@ -69,7 +69,7 @@ if(USE_CUDA) endif()
endif() endif()
else() else()
if(USE_GLOO AND USE_C10D_GLOO) if(USE_GLOO AND USE_C10D_GLOO)
- c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu gtest_main) - c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
+ c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu gtest_main gtest) + c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF)
endif() endif()
endif() endif()
diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt
index 7dff706..90b1003 100644 index 9c409e078..6cddd8de4 100644
--- a/test/cpp/tensorexpr/CMakeLists.txt --- a/test/cpp/tensorexpr/CMakeLists.txt
+++ b/test/cpp/tensorexpr/CMakeLists.txt +++ b/test/cpp/tensorexpr/CMakeLists.txt
@@ -54,7 +54,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE}) @@ -51,7 +51,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
# pthreadpool header. For some build environment we need add the dependency # pthreadpool header. For some build environment we need add the dependency
# explicitly. # explicitly.
if(USE_PTHREADPOOL) if(USE_PTHREADPOOL)
@@ -396,4 +429,17 @@ index 7dff706..90b1003 100644
+ target_link_libraries(test_tensorexpr PRIVATE pthreadpool) + target_link_libraries(test_tensorexpr PRIVATE pthreadpool)
endif() endif()
if(USE_CUDA) if(USE_CUDA)
target_link_libraries(test_tensorexpr PRIVATE target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA)
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
index 8b8ebdc6e..034b5e56c 100644
--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@@ -82,8 +82,6 @@ set(TORCH_PYTHON_LINK_LIBRARIES
Python::Module
pybind::pybind11
opentelemetry::api
- httplib
- nlohmann
shm
fmt::fmt-header-only
ATEN_CPU_FILES_GEN_LIB)
@@ -0,0 +1,64 @@
Even when building without Kineto, the <ActivityType.h> header is still
imported and the ActivityType type is used. This patch was copied from
https://github.com/pytorch/pytorch/pull/111048 and adapted.
diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h
index c4efd7785..2caef1f1e 100644
--- a/torch/csrc/profiler/kineto_shim.h
+++ b/torch/csrc/profiler/kineto_shim.h
@@ -12,7 +12,55 @@
#undef USE_KINETO
#endif
+#ifdef USE_KINETO
#include <ActivityType.h>
+#else
+namespace libkineto {
+// copied from header
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// Note : All activity types are not enabled by default. Please add them
+// at correct position in the enum
+enum class ActivityType {
+ // Activity types enabled by default
+ CPU_OP = 0, // cpu side ops
+ USER_ANNOTATION,
+ GPU_USER_ANNOTATION,
+ GPU_MEMCPY,
+ GPU_MEMSET,
+ CONCURRENT_KERNEL, // on-device kernels
+ EXTERNAL_CORRELATION,
+ CUDA_RUNTIME, // host side cuda runtime events
+ CUDA_DRIVER, // host side cuda driver events
+ CPU_INSTANT_EVENT, // host side point-like events
+ PYTHON_FUNCTION,
+ OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
+
+ // Optional Activity types
+ CUDA_SYNC, // synchronization events between runtime and kernels
+ GLOW_RUNTIME, // host side glow runtime events
+ MTIA_RUNTIME, // host side MTIA runtime events
+ CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
+ MTIA_CCP_EVENTS, // MTIA ondevice CCP events
+ HPU_OP, // HPU host side runtime event
+ XPU_RUNTIME, // host side xpu runtime events
+ MTIA_WORKLOADD,
+
+ PRIVATEUSE1_RUNTIME,
+ PRIVATEUSE1_DRIVER,
+
+ ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
+ OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,
+};
+}
+
+#endif
#include <torch/csrc/Export.h>
#include <torch/csrc/profiler/api.h>