gnu: python-pytorch-for-r-torch: Update to 2.7.1.

* gnu/packages/patches/python-pytorch-for-r-torch-fix-codegen.patch, gnu/packages/patches/python-pytorch-for-r-torch-system-libraries.patch: Update. * gnu/packages/patches/python-pytorch-for-r-torch-without-kineto.patch: New file. * gnu/local.mk (dist_patch_DATA): Record it. * gnu/packages/machine-learning.scm (python-pytorch-for-r-torch): Update to 2.7.1. [source]: Use new patch. [arguments]: Remove phase 'fix-aten-vec; copy and adjust 'use-system-libraries phase from python-pytorch. [inputs]: Inherit all from python-pytorch; replace gloo with gloo-for-r-torch. [native-inputs]: Inherit all from python-pytorch. [propagated-inputs]: Inherit all from python-pytorch. Change-Id: Ib2cf511fc34f609bbc7e92971720b00c4523419f
2026-05-28 03:51:53 +02:00 · 2025-11-07 17:45:02 +01:00
parent 0f2df2dad5
commit 02f59daf07
5 changed files with 353 additions and 192 deletions
@@ -2176,6 +2176,7 @@ dist_patch_DATA =						\
  %D%/packages/patches/python-pytorch-fix-codegen.patch		\
  %D%/packages/patches/python-pytorch-for-r-torch-fix-codegen.patch \
  %D%/packages/patches/python-pytorch-for-r-torch-system-libraries.patch \
  %D%/packages/patches/python-pytorch-for-r-torch-without-kineto.patch \
  %D%/packages/patches/python-pytorch-runpath.patch		\
  %D%/packages/patches/python-pytorch-system-libraries.patch	\
  %D%/packages/patches/python-pytorch-without-kineto.patch	\
@@ -4909,7 +4909,7 @@ in the audio domain.")
           (delete 'disable-avx-dependencies)))))
    (supported-systems '("x86_64-linux"))))
-(define %python-pytorch-for-r-torch-version "2.0.1")
+(define %python-pytorch-for-r-torch-version "2.7.1")
 (define %python-pytorch-for-r-torch-src
  (origin
@@ -4921,11 +4921,11 @@ in the audio domain.")
                              %python-pytorch-for-r-torch-version))
    (sha256
     (base32
-      "0iirrn687i7sfv0p0i7dn89x3rf13a7l8y1y5h190h51yjxpxqxa"))
+      "0734kfm66hsqdzgs2s4wj5yagvifijbgb0c5wfmp3qcdrraa9x57"))
    (patches (search-patches
              "python-pytorch-for-r-torch-system-libraries.patch"
              "python-pytorch-runpath.patch"
-              "python-pytorch-without-kineto.patch"
+              "python-pytorch-for-r-torch-without-kineto.patch"
              ;; Some autogeneration scripts depend on the
              ;; compile PyTorch library. Therefore, we create
              ;; dummy versions which are regenerated later.
@@ -4952,34 +4952,73 @@ in the audio domain.")
    (name "python-pytorch")
    (version %python-pytorch-for-r-torch-version)
    (source %python-pytorch-for-r-torch-src)
    (inputs
     (modify-inputs (package-inputs python-pytorch)
       (replace "gloo" gloo-for-r-torch)))
    (arguments
     (substitute-keyword-arguments (package-arguments python-pytorch)
       ((#:phases phases)
        #~(modify-phases #$phases
-            ;; See https://github.com/pytorch/pytorch/issues/61244
+            (replace 'use-system-libraries
            (add-after 'unpack 'fix-aten-vec
              (lambda _
                (for-each
                 (lambda (file)
                   ;; Check whether the files exist for the
                   ;; python-pytorch-for-r-torch package
                   (when (file-exists? file)
                     (substitute* file
                       (("\"miniz\\.h\"") "<miniz/miniz.h>")
                       (("<miniz\\.h>") "<miniz/miniz.h>"))))
                 '("caffe2/serialize/crc.cc"
                   "caffe2/serialize/inline_container.cc"
                   "torch/csrc/inductor/aoti_package/model_package_loader.cpp"))
                (substitute* "aten/src/ATen/native/vulkan/api/Allocator.h"
                  (("<include/vk_mem_alloc.h>")
                   "<vk_mem_alloc.h>"))
                ;; Fix missing <algorithm> header for std::for_each in Vulkan API
                (substitute* "aten/src/ATen/native/vulkan/api/QueryPool.cpp"
                  (("#include <utility>" all)
                   (string-append all "\n#include <algorithm>")))
                ;; For Vulkan
                (substitute* "CMakeLists.txt"
                  (("append_cxx_flag.*-Werror=(return-type|range-loop-construct).*") ""))
                (substitute*
-                    '("aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h"
+                    (cons*
-                      "aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h")
+                     "torch/csrc/Module.cpp"
-                  (("map\\(const __") "map(__"))))))))
+                     (map
-    (native-inputs
+                      (lambda (name)
-     (modify-inputs (package-native-inputs python-pytorch)
+                        (string-append
-       (replace "ideep-pytorch" ideep-pytorch-for-r-torch)))
+                         "torch/utils/benchmark/utils/valgrind_wrapper/"
-    (inputs
+                         name))
-     (modify-inputs (package-inputs python-pytorch)
+                      '("compat_bindings.cpp" "timer_callgrind_template.cpp")))
-       (prepend foxi)
+                  (("<callgrind.h>") "<valgrind/callgrind.h>"))
-       (prepend qnnpack)
+                (setenv "USE_VULKAN" "1")
-       (replace "qnnpack-pytorch" qnnpack-pytorch-for-r-torch)
+                ;; Tell 'setup.py' to let 'CMakeLists.txt' know that we
-       (replace "oneapi-dnnl" oneapi-dnnl-for-r-torch)
+                ;; want to use "system libraries" instead of the bundled
-       (replace "xnnpack" xnnpack-for-r-torch)))
+                ;; ones.
-    (propagated-inputs
+                (setenv "USE_SYSTEM_LIBS" "1")
-     (modify-inputs (package-propagated-inputs python-pytorch)
+                ;; For oneDNN
-       (append python-filelock
+                (setenv "USE_MKLDNN" "1")
-               python-jinja2
+                ;; Only works with CUPTI
-               python-networkx
+                (setenv "USE_KINETO" "0")
-               python-opt-einsum
+                ;; Prevent CMake error by disabling explicitely
-               python-sympy)))))
+                (setenv "USE_ITT" "0")
                ;; Disable on unsupported systems
                (if #$(not (member
                            (or (%current-target-system)
                                (%current-system))
                            (package-transitive-supported-systems qnnpack)))
                    (setenv "USE_QNNPACK" "0"))
                (substitute* '("requirements.txt" "setup.py")
                  (("sympy>=1\\.13\\.3")
                   "sympy>=1.13.1"))))
            (replace 'skip-nccl-call
              (lambda _
                ;; Comment-out `checkout_nccl()` invokation in build_pytorch().
                (substitute* "tools/build_pytorch_libs.py"
                  (("^[[:blank:]]*checkout_nccl\\(\\)" all)
                   (string-append "# " all "\n    pass")))))))))))
 (define-public python-pytorch-geometric
    (package
@@ -6,7 +6,7 @@ is later corrected.  codegen_external.py is patched to avoid duplicate
 functions and add the static keyword as in the existing generated file.
 diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh
-index cc0263dbbf..ac34e84b82 100644
+index cc0263dbb..ac34e84b8 100644
 --- a/tools/gen_flatbuffers.sh
 +++ b/tools/gen_flatbuffers.sh
@@ -1,13 +1,13 @@
@@ -32,10 +32,10 @@ index cc0263dbbf..ac34e84b82 100644
      -c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs"
 echo '// @generated' >> "$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h"
 diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py b/torch/csrc/jit/tensorexpr/codegen_external.py
-index 120520b139..0c8587f02d 100644
+index 5dcf1b284..0e20b0c10 100644
 --- a/torch/csrc/jit/tensorexpr/codegen_external.py
 +++ b/torch/csrc/jit/tensorexpr/codegen_external.py
-@@ -16,9 +16,14 @@ def gen_external(native_functions_path, tags_path, external_path):
+@@ -21,9 +21,14 @@ def gen_external(native_functions_path, tags_path, external_path):
     native_functions = parse_native_yaml(native_functions_path, tags_path)
     func_decls = []
     func_registrations = []
@@ -51,7 +51,7 @@ index 120520b139..0c8587f02d 100644
         args = schema.arguments
         # Only supports extern calls for functions with out variants
         if not schema.is_out_fn():
-@@ -48,7 +53,7 @@ def gen_external(native_functions_path, tags_path, external_path):
+@@ -63,7 +68,7 @@ def gen_external(native_functions_path, tags_path, external_path):
         # print(tensor_decls, name, arg_names)
         func_decl = f"""\
@@ -61,7 +61,7 @@ index 120520b139..0c8587f02d 100644
     void** buf_data,
     int64_t* buf_ranks,
 diff --git a/torchgen/decompositions/gen_jit_decompositions.py b/torchgen/decompositions/gen_jit_decompositions.py
-index 7cfbb803f9..2e69bb1868 100644
+index b42948045..e1cfc73a5 100644
 --- a/torchgen/decompositions/gen_jit_decompositions.py
 +++ b/torchgen/decompositions/gen_jit_decompositions.py
@@ -1,8 +1,12 @@
@@ -76,9 +76,9 @@ index 7cfbb803f9..2e69bb1868 100644
 +else:
 +    decomposition_table = {}
 # from torchgen.code_template import CodeTemplate
-@@ -85,7 +89,7 @@ def write_decomposition_util_file(path: str) -> None:
+ # from torchgen.code_template import CodeTemplate
@@ -86,7 +90,7 @@ def write_decomposition_util_file(path: str) -> None:
 def main() -> None:
@@ -88,40 +88,41 @@ index 7cfbb803f9..2e69bb1868 100644
     write_decomposition_util_file(str(upgrader_path))
 diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py
-index e5287cffc5..57f3c38096 100644
+index 845034cb7..a1c5767c2 100644
 --- a/torchgen/operator_versions/gen_mobile_upgraders.py
 +++ b/torchgen/operator_versions/gen_mobile_upgraders.py
-@@ -2,10 +2,12 @@
+@@ -6,10 +6,13 @@ import os
 import os
 from enum import Enum
 from operator import itemgetter
 from pathlib import Path
 +import sys
- from typing import Any, Dict, List
+ from typing import Any
 -import torch
 -from torch.jit.generate_bytecode import generate_upgraders_bytecode
 +if len(sys.argv) < 2 or sys.argv[1] != "dummy":
 +    import torch
 +    from torch.jit.generate_bytecode import generate_upgraders_bytecode
- 
+
 from torchgen.code_template import CodeTemplate
 from torchgen.operator_versions.gen_mobile_upgraders_constant import (
-@@ -262,7 +264,10 @@ def construct_register_size(register_size_from_yaml: int) -> str:
+     MOBILE_UPGRADERS_HEADER_DESCRIPTION,
@@ -263,7 +266,10 @@ def construct_register_size(register_size_from_yaml: int) -> str:
 def construct_version_maps(
-     upgrader_bytecode_function_to_index_map: Dict[str, Any]
+     upgrader_bytecode_function_to_index_map: dict[str, Any],
 ) -> str:
 -    version_map = torch._C._get_operator_version_map()
 +    if len(sys.argv) < 2 or sys.argv[1] != "dummy":
 +        version_map = torch._C._get_operator_version_map()
 +    else:
 +        version_map = {}
-     sorted_version_map_ = sorted(version_map.items(), key=lambda item: item[0])  # type: ignore[no-any-return]
+     sorted_version_map_ = sorted(version_map.items(), key=itemgetter(0))  # type: ignore[no-any-return]
-     sorted_version_map = {name: lst for name, lst in sorted_version_map_}
+     sorted_version_map = dict(sorted_version_map_)
@@ -375,7 +381,10 @@ def sort_upgrader(upgrader_list: list[dict[str, Any]]) -> list[dict[str, Any]]:
@@ -379,7 +384,10 @@ def sort_upgrader(upgrader_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 def main() -> None:
 -    upgrader_list = generate_upgraders_bytecode()
 +    if len(sys.argv) < 2 or sys.argv[1] != "dummy":
 +        upgrader_list = generate_upgraders_bytecode()
@@ -131,16 +132,24 @@ index e5287cffc5..57f3c38096 100644
     for up in sorted_upgrader_list:
         print("after sort upgrader : ", next(iter(up)))
 diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py b/torchgen/shape_functions/gen_jit_shape_functions.py
-index c6336a6951..34e394d818 100644
+index 56a3d8bf0..ffd0785fd 100644
 --- a/torchgen/shape_functions/gen_jit_shape_functions.py
 +++ b/torchgen/shape_functions/gen_jit_shape_functions.py
-@@ -18,16 +18,20 @@ you are in the root directory of the Pytorch git repo"""
+@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 import os
 import sys
 +import importlib
 from importlib.util import module_from_spec, spec_from_file_location
 from itertools import chain
 from pathlib import Path
@@ -18,17 +19,21 @@ you are in the root directory of the Pytorch git repo"""
 if not file_path.exists():
-     raise Exception(err_msg)
+     raise Exception(err_msg)  # noqa: TRY002
-spec = importlib.util.spec_from_file_location(module_name, file_path)
+-spec = spec_from_file_location(module_name, file_path)
 -assert spec is not None
-module = importlib.util.module_from_spec(spec)
+-module = module_from_spec(spec)
 -sys.modules[module_name] = module
 -assert spec.loader is not None
 -assert module is not None
@@ -148,6 +157,7 @@ index c6336a6951..34e394d818 100644
 -
 -bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
 -shape_compute_graph_mapping = module.shape_compute_graph_mapping
 -
 +if len(sys.argv) < 2 or sys.argv[1] != "dummy":
 +    spec = importlib.util.spec_from_file_location(module_name, file_path)
 +    assert spec is not None
@@ -159,9 +169,10 @@ index c6336a6951..34e394d818 100644
 +
 +    bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
 +    shape_compute_graph_mapping = module.shape_compute_graph_mapping
 +
 +else:
 +    bounded_compute_graph_mapping = {}
 +    shape_compute_graph_mapping = {}
 SHAPE_HEADER = r"""
 /**
@@ -1,16 +1,14 @@
 Patch build files to also system libraries instead of bundled ones for the
 libraries not supported or working only by specifying USE_SYSTEM_LIBS.  This
 includes using the clog, cpuinfo, fbgemm, foxi, fp16, fxdiv, googletest,
-ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool, qnnpack,
+httlib, ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool,
 qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages.
 For QNNPACK, two versions were bundled and are required: The upstream one and
 an internal fork (now in the package qnnpack-pytorch).
 diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
-index 96fc297..7f27b66 100644
+index 085af373e..3287429b4 100644
 --- a/aten/src/ATen/CMakeLists.txt
 +++ b/aten/src/ATen/CMakeLists.txt
-@@ -362,9 +362,9 @@ if(AT_NNPACK_ENABLED)
+@@ -468,9 +468,9 @@ if(AT_NNPACK_ENABLED)
   list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below
 endif()
@@ -21,13 +19,13 @@ index 96fc297..7f27b66 100644
 +  list(APPEND ATen_CPU_DEPENDENCY_LIBS DNNL::dnnl)
 +endif(USE_MKLDNN)
- list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo)
+ if(USE_MKLDNN_ACL)
- 
+     list(APPEND ATen_CPU_INCLUDE ${ACL_INCLUDE_DIRS})
 diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index 221e3f3..417f601 100644
+index d2d23b7ab..1a7e5a042 100644
 --- a/caffe2/CMakeLists.txt
 +++ b/caffe2/CMakeLists.txt
-@@ -110,9 +110,6 @@ if(NOT MSVC AND USE_XNNPACK)
+@@ -91,9 +91,6 @@ if(NOT MSVC AND USE_XNNPACK)
   if(NOT TARGET fxdiv)
     set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
     set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
@@ -37,7 +35,7 @@ index 221e3f3..417f601 100644
   endif()
 endif()
-@@ -975,7 +972,6 @@ elseif(USE_CUDA)
+@@ -1135,7 +1132,6 @@ if(USE_XPU)
 endif()
 if(NOT MSVC AND USE_XNNPACK)
@@ -45,15 +43,26 @@ index 221e3f3..417f601 100644
 endif()
 # ==========================================================
-@@ -1314,6 +1310,7 @@ target_link_libraries(torch_cpu PUBLIC c10)
+@@ -1254,8 +1250,8 @@ endif()
 target_include_directories(torch_cpu PRIVATE
   ${TORCH_ROOT}/third_party/cpp-httplib)
 -target_include_directories(torch_cpu PRIVATE
 -  ${TORCH_ROOT}/third_party/nlohmann/include)
 +find_package(httplib REQUIRED)
 +target_link_libraries(torch_cpu PUBLIC httplib::httplib)
 install(DIRECTORY "${TORCH_SRC_DIR}/csrc"
   DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch
@@ -1494,6 +1490,7 @@ target_link_libraries(torch_cpu PUBLIC c10)
 target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
 target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS})
 target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
 +target_link_libraries(torch_cpu PRIVATE miniz clog)
- target_include_directories(torch_cpu INTERFACE $<INSTALL_INTERFACE:include>)
+ if(USE_MPI)
- target_include_directories(torch_cpu PRIVATE ${Caffe2_CPU_INCLUDE})
+   target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX)
- target_include_directories(torch_cpu SYSTEM PRIVATE "${Caffe2_DEPENDENCY_INCLUDE}")
+ endif()
-@@ -1570,7 +1567,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK)
+@@ -1728,7 +1725,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK)
   add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}")
   add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}")
   target_link_libraries(static_runtime_bench torch_library benchmark)
@@ -61,8 +70,8 @@ index 221e3f3..417f601 100644
 +  target_link_libraries(static_runtime_test torch_library gtest_main gtest)
 endif()
- if(BUILD_TENSOREXPR_BENCHMARK)
+ if(BUILD_MOBILE_BENCHMARK)
-@@ -1601,7 +1598,7 @@ if(BUILD_MOBILE_TEST)
+@@ -1747,7 +1744,7 @@ if(BUILD_MOBILE_TEST)
   foreach(test_src ${ATen_MOBILE_TEST_SRCS})
     get_filename_component(test_name ${test_src} NAME_WE)
     add_executable(${test_name} "${test_src}")
@@ -71,32 +80,61 @@ index 221e3f3..417f601 100644
     target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
     target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
     target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE})
-@@ -1622,13 +1619,13 @@ if(BUILD_TEST)
+@@ -1768,7 +1765,7 @@ if(BUILD_TEST)
         if(NOT MSVC)
           add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp)
           # TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR)
-          target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main)
+-          target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main nlohmann)
-+          target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main gtest)
+          target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main gtest nlohmann)
           if(USE_FBGEMM)
             target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm)
           endif()
@@ -1782,7 +1779,7 @@ if(BUILD_TEST)
           endif()
         else()
           add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}")
-          target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main)
+-          target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main)
-+          target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main gtest)
+          target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main gtest)
         endif()
         target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>)
         target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
-@@ -1645,7 +1642,7 @@ if(BUILD_TEST)
+@@ -1799,7 +1796,7 @@ if(BUILD_TEST)
   foreach(test_src ${Caffe2_CPU_TEST_SRCS})
     get_filename_component(test_name ${test_src} NAME_WE)
     add_executable(${test_name} "${test_src}")
 -    target_link_libraries(${test_name} torch_library gtest_main)
 +    target_link_libraries(${test_name} torch_library gtest_main gtest)
-     target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
+     if(NOT MSVC)
-     target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
+       target_link_libraries(${test_name} stdc++)
-     target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
+     endif()
-@@ -1703,7 +1700,7 @@ if(BUILD_TEST)
+@@ -1823,7 +1820,7 @@ if(BUILD_TEST)
       add_executable(${test_name} "${test_src}")
       find_library(metal NAMES Metal)
       find_library(foundation NAMES Foundation)
 -      target_link_libraries(${test_name} torch_library gtest_main ${metal} ${foundation})
 +      target_link_libraries(${test_name} torch_library gtest_main gtest ${metal} ${foundation})
       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
       target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
@@ -1843,7 +1840,7 @@ if(BUILD_TEST)
     foreach(test_src ${Caffe2_GPU_TEST_SRCS})
       get_filename_component(test_name ${test_src} NAME_WE)
       add_executable(${test_name} "${test_src}")
 -      target_link_libraries(${test_name} torch_library gtest_main)
 +      target_link_libraries(${test_name} torch_library gtest_main gtest)
       if(USE_CUDNN AND ${test_name} MATCHES "cudnn")
         target_link_libraries(${test_name} torch::cudnn)
       endif()
@@ -1865,7 +1862,7 @@ if(BUILD_TEST)
     foreach(test_src ${Caffe2_XPU_TEST_SRCS})
       get_filename_component(test_name ${test_src} NAME_WE)
       add_executable(${test_name} "${test_src}")
 -      target_link_libraries(${test_name} torch_library gtest_main)
 +      target_link_libraries(${test_name} torch_library gtest_main gtest)
       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
@@ -1880,7 +1877,7 @@ if(BUILD_TEST)
     foreach(test_src ${Caffe2_VULKAN_TEST_SRCS})
       get_filename_component(test_name ${test_src} NAME_WE)
       add_executable(${test_name} "${test_src}")
@@ -105,23 +143,32 @@ index 221e3f3..417f601 100644
       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
@@ -1899,7 +1896,7 @@ if(BUILD_TEST)
     foreach(test_src ${Caffe2_HIP_TEST_SRCS})
       get_filename_component(test_name ${test_src} NAME_WE)
       add_executable(${test_name} "${test_src}")
 -      target_link_libraries(${test_name} torch_library gtest_main)
 +      target_link_libraries(${test_name} torch_library gtest_main gtest)
       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
       target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
 diff --git a/caffe2/serialize/CMakeLists.txt b/caffe2/serialize/CMakeLists.txt
-index 1552b59..67e1a9a 100644
+index ebbff0f29..dcded2590 100644
 --- a/caffe2/serialize/CMakeLists.txt
 +++ b/caffe2/serialize/CMakeLists.txt
@@ -2,7 +2,6 @@ file(GLOB tmp *_test.cc)
 set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
 list(APPEND Caffe2_CPU_SRCS
-  ${PROJECT_SOURCE_DIR}/third_party/miniz-2.1.0/miniz.c
+-  ${PROJECT_SOURCE_DIR}/third_party/miniz-3.0.2/miniz.c
   ${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc
   ${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc
   ${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 8c0e3c2..d65576a 100644
+index be45936a8..bb1aa1cc1 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
-@@ -298,7 +298,7 @@ endif()
+@@ -276,7 +276,7 @@ endif()
 # --- [ PocketFFT
 set(AT_POCKETFFT_ENABLED 0)
 if(NOT AT_MKL_ENABLED)
@@ -130,27 +177,7 @@ index 8c0e3c2..d65576a 100644
   if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}")
     message(FATAL_ERROR "pocketfft directory not found, expected ${POCKETFFT_INCLUDE_DIR}")
   elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h")
-@@ -501,19 +501,6 @@ if(USE_QNNPACK)
+@@ -460,15 +460,6 @@ if(USE_PYTORCH_QNNPACK)
     set(QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
     set(QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
     set(QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
 -    add_subdirectory(
 -      "${QNNPACK_SOURCE_DIR}"
 -      "${CONFU_DEPENDENCIES_BINARY_DIR}/QNNPACK")
 -
 -    # TODO: See https://github.com/pytorch/pytorch/issues/56285
 -    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
 -      target_compile_options(qnnpack PRIVATE -Wno-deprecated-declarations)
 -    endif()
 -
 -    # We build static versions of QNNPACK and pthreadpool but link
 -    # them into a shared library for Caffe2, so they need PIC.
 -    set_property(TARGET qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
 -    set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
     if(QNNPACK_CUSTOM_THREADPOOL)
       target_compile_definitions(
@@ -562,13 +549,6 @@ if(USE_PYTORCH_QNNPACK)
       set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
       set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
       set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
@@ -161,10 +188,33 @@ index 8c0e3c2..d65576a 100644
 -      # them into a shared library for Caffe2, so they need PIC.
 -      set_property(TARGET pytorch_qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
 -      set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
 -      # QNNPACK depends on gemmlowp headers
 -      target_include_directories(pytorch_qnnpack PRIVATE "${CAFFE2_THIRD_PARTY_ROOT}/gemmlowp")
     endif()
-       if(PYTORCH_QNNPACK_CUSTOM_THREADPOOL)
+     list(APPEND Caffe2_DEPENDENCY_LIBS pytorch_qnnpack)
-         target_compile_definitions(
+@@ -558,16 +549,15 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK)
-@@ -750,11 +730,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
+   list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod)
 elseif(NOT TARGET XNNPACK AND USE_SYSTEM_XNNPACK)
   add_library(XNNPACK SHARED IMPORTED)
 -  add_library(microkernels-prod SHARED IMPORTED)
 +  add_library(microkernels-prod INTERFACE IMPORTED)
   find_library(XNNPACK_LIBRARY XNNPACK)
 -  find_library(microkernels-prod_LIBRARY microkernels-prod)
   set_property(TARGET XNNPACK PROPERTY IMPORTED_LOCATION "${XNNPACK_LIBRARY}")
 -  set_property(TARGET microkernels-prod PROPERTY IMPORTED_LOCATION "${microkernels-prod_LIBRARY}")
 -  if(NOT XNNPACK_LIBRARY or NOT microkernels-prod_LIBRARY)
 +  set_property(TARGET microkernels-prod PROPERTY INTERFACE_LINK_LIBRARIES XNNPACK)
 +  if(NOT XNNPACK_LIBRARY)
     message(FATAL_ERROR "Cannot find XNNPACK")
   endif()
   message("-- Found XNNPACK: ${XNNPACK_LIBRARY}")
 -  list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod)
 +  list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK)
 endif()
 # ---[ Vulkan deps
@@ -650,11 +640,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
   # this shouldn't be necessary anymore.
   get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
   set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "")
@@ -176,9 +226,9 @@ index 8c0e3c2..d65576a 100644
   # We will not need to test benchmark lib itself.
   set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.")
-@@ -829,16 +804,6 @@ if(USE_FBGEMM)
+@@ -732,16 +717,6 @@ if(USE_FBGEMM)
-     else()
+     if(USE_ASAN)
-       set(FBGEMM_LIBRARY_TYPE "static" CACHE STRING "")
+       set(USE_SANITIZER "address,undefined" CACHE STRING "-fsanitize options for FBGEMM")
     endif()
 -    add_subdirectory("${FBGEMM_SOURCE_DIR}")
 -    set_property(TARGET fbgemm_generic PROPERTY POSITION_INDEPENDENT_CODE ON)
@@ -190,44 +240,39 @@ index 8c0e3c2..d65576a 100644
 -      target_compile_options_if_supported(asmjit -Wno-deprecated-copy)
 -      target_compile_options_if_supported(asmjit -Wno-unused-but-set-variable)
 -    endif()
     if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
       target_compile_options_if_supported(asmjit -Wno-extra-semi)
       target_compile_options_if_supported(fbgemm -Wno-extra-semi)
@@ -829,7 +804,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
       "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
   endif()
   if(USE_FBGEMM)
@@ -1001,7 +966,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
     "${FP16_SOURCE_DIR}"
     "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
 elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16)
 -  add_library(fp16 STATIC "/usr/include/fp16.h")
 +  add_library(fp16 STATIC "#FP16_INCLUDE_DIR")
   set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C)
 endif()
 list(APPEND Caffe2_DEPENDENCY_LIBS fp16)
-@@ -1395,7 +1360,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
+@@ -1170,10 +1145,9 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
- 
+       message(WARNING "Archived TensorPipe forces CMake compatibility mode")
-     # Tensorpipe uses cuda_add_library
+       set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
     torch_update_find_cuda_flags()
 -    add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
     list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe)
     if(USE_CUDA)
@@ -1551,7 +1515,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
       set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
     endif()
-   endif()
+-    add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
-  add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
+     # Suppress warning to unblock libnop comiplation by clang-17
- 
+     # See https://github.com/pytorch/pytorch/issues/151316
-   add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE})
+     target_compile_options_if_supported(tensorpipe -Wno-missing-template-arg-list-after-template-kw)
-   if(NOT USE_SYSTEM_ONNX)
+     if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
-@@ -1582,7 +1545,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
+       unset(CMAKE_POLICY_VERSION_MINIMUM)
     endif()
@@ -1340,7 +1314,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
     endif()
     set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY})
     message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
 -    list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
 +    list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx onnx_optimizer)
   endif()
-   include_directories(${FOXI_INCLUDE_DIRS})
+   # Recover the build shared libs option.
-   list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
+   set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
-@@ -1752,9 +1715,8 @@ if(NOT INTERN_BUILD_MOBILE)
+@@ -1500,9 +1474,8 @@ if(NOT INTERN_BUILD_MOBILE)
   endif()
   if(USE_MKLDNN)
     include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake)
@@ -235,10 +280,10 @@ index 8c0e3c2..d65576a 100644
 +    if(DNNL_FOUND)
       set(AT_MKLDNN_ENABLED 1)
 -      include_directories(AFTER SYSTEM ${MKLDNN_INCLUDE_DIR})
-       if(BUILD_CAFFE2_OPS)
+     else()
-         list(APPEND Caffe2_DEPENDENCY_LIBS caffe2::mkldnn)
+       message(WARNING "MKLDNN could not be found.")
-       endif(BUILD_CAFFE2_OPS)
+       caffe2_update_option(USE_MKLDNN OFF)
-@@ -1819,7 +1781,7 @@ endif()
+@@ -1583,7 +1556,7 @@ endif()
 #
 set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
 set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
@@ -247,7 +292,7 @@ index 8c0e3c2..d65576a 100644
 # Disable compiler feature checks for `fmt`.
 #
-@@ -1828,7 +1790,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
+@@ -1592,7 +1565,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
 # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
 # `fmt` is compatible with a superset of the compilers that PyTorch is, it
 # shouldn't be too bad to just disable the checks.
@@ -256,7 +301,7 @@ index 8c0e3c2..d65576a 100644
 list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
 set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
 diff --git a/cmake/External/nnpack.cmake b/cmake/External/nnpack.cmake
-index a41343c..6075bdd 100644
+index 8a4a310d6..f413d2e61 100644
 --- a/cmake/External/nnpack.cmake
 +++ b/cmake/External/nnpack.cmake
@@ -40,7 +40,7 @@ endif()
@@ -268,7 +313,7 @@ index a41343c..6075bdd 100644
   message(STATUS "Brace yourself, we are building NNPACK")
   set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party)
-@@ -114,6 +114,5 @@ endif()
+@@ -94,6 +94,5 @@ endif()
 # (4) Catch-all: not supported.
 ##############################################################################
@@ -278,7 +323,7 @@ index a41343c..6075bdd 100644
 +set(NNPACK_FOUND TRUE)
 +set(USE_NNPACK ON)
 diff --git a/cmake/public/mkldnn.cmake b/cmake/public/mkldnn.cmake
-index 50404d3..ca067f0 100644
+index 87935625f..9f8fa3df8 100644
 --- a/cmake/public/mkldnn.cmake
 +++ b/cmake/public/mkldnn.cmake
@@ -4,7 +4,7 @@ if(CPU_AARCH64)
@@ -290,105 +335,93 @@ index 50404d3..ca067f0 100644
 if(NOT TARGET caffe2::mkldnn)
   add_library(caffe2::mkldnn INTERFACE IMPORTED)
-@@ -15,7 +15,7 @@ set_property(
+@@ -15,4 +15,4 @@ set_property(
   ${MKLDNN_INCLUDE_DIR})
 set_property(
   TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES
 -  ${MKLDNN_LIBRARIES})
 +  DNNL::dnnl)
 if(BUILD_ONEDNN_GRAPH)
   if(NOT TARGET caffe2::dnnl_graph)
     add_library(caffe2::dnnl_graph INTERFACE IMPORTED)
 diff --git a/setup.py b/setup.py
-index 34b2854..5db117f 100644
+index 61ee9363f..3691cc35c 100644
 --- a/setup.py
 +++ b/setup.py
-@@ -418,13 +418,9 @@ def build_deps():
+@@ -508,13 +508,9 @@ def build_deps():
     # Windows has very poor support for them.
     sym_files = [
-         'tools/shared/_utils_internal.py',
+         "tools/shared/_utils_internal.py",
-        'torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h',
+-        "torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h",
-        'torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h',
+-        "torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h",
     ]
     orig_files = [
-         'torch/_utils_internal.py',
+         "torch/_utils_internal.py",
-        'third_party/valgrind-headers/callgrind.h',
+-        "third_party/valgrind-headers/callgrind.h",
-        'third_party/valgrind-headers/valgrind.h',
+-        "third_party/valgrind-headers/valgrind.h",
     ]
     for sym_file, orig_file in zip(sym_files, orig_files):
         same = False
 diff --git a/test/cpp/c10d/CMakeLists.txt b/test/cpp/c10d/CMakeLists.txt
-index 89c6b91..0c60d08 100644
+index 5b423241d..e069accd6 100644
 --- a/test/cpp/c10d/CMakeLists.txt
 +++ b/test/cpp/c10d/CMakeLists.txt
-@@ -16,14 +16,14 @@ function(c10d_add_test test_src)
+@@ -26,17 +26,17 @@ function(c10d_add_test test_src)
-   add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
+   endif()
 endfunction()
-c10d_add_test(FileStoreTest.cpp torch_cpu gtest_main)
+-c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
-c10d_add_test(TCPStoreTest.cpp torch_cpu gtest_main)
+-c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
-+c10d_add_test(FileStoreTest.cpp torch_cpu gtest_main gtest)
+-c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
-+c10d_add_test(TCPStoreTest.cpp torch_cpu gtest_main gtest)
+c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF)
- if(INSTALL_TEST)
+c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
-   install(TARGETS FileStoreTest DESTINATION bin)
+c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
   install(TARGETS TCPStoreTest DESTINATION bin)
 endif()
 if(NOT WIN32)
-  c10d_add_test(HashStoreTest.cpp torch_cpu gtest_main)
+-  c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
-+  c10d_add_test(HashStoreTest.cpp torch_cpu gtest_main gtest)
+  c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
-   if(INSTALL_TEST)
+ endif()
     install(TARGETS HashStoreTest DESTINATION bin)
   endif()
@@ -31,11 +31,11 @@ endif()
 if(USE_CUDA)
   if(USE_GLOO AND USE_C10D_GLOO)
-    c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu c10d_cuda_test gtest_main)
+-    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
-+    c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu c10d_cuda_test gtest_main gtest)
+-    c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
-     if(INSTALL_TEST)
+    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
-       install(TARGETS ProcessGroupGlooTest DESTINATION bin)
+    c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
     endif()
 -    c10d_add_test(ProcessGroupGlooAsyncTest.cpp torch_cpu c10d_cuda_test gtest_main)
 +    c10d_add_test(ProcessGroupGlooAsyncTest.cpp torch_cpu c10d_cuda_test gtest_main gtest)
   endif()
   if(USE_NCCL AND USE_C10D_NCCL)
     # NCCL is a private dependency of libtorch, but the tests include some
-@@ -44,10 +44,10 @@ if(USE_CUDA)
+@@ -45,10 +45,10 @@ if(USE_CUDA)
     # a private dependency of the tests as well.
     c10d_add_test(
       ProcessGroupNCCLTest.cpp
-      torch_cpu c10d_cuda_test gtest_main __caffe2_nccl)
+-      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
-+      torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl)
+      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
     c10d_add_test(
       ProcessGroupNCCLErrorsTest.cpp
-      torch_cpu c10d_cuda_test gtest_main __caffe2_nccl)
+-      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
-+      torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl)
+      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
     if(INSTALL_TEST)
-       install(TARGETS ProcessGroupNCCLTest DESTINATION bin)
+       install(TARGETS c10d_cuda_test DESTINATION lib)
-       install(TARGETS ProcessGroupNCCLErrorsTest DESTINATION bin)
+     endif()
-@@ -61,7 +61,7 @@ if(USE_CUDA)
+@@ -60,14 +60,14 @@ if(USE_CUDA)
     # a private dependency of the tests as well.
     c10d_add_test(
       ProcessGroupUCCTest.cpp
-      torch_cpu c10d_cuda_test gtest_main __caffe2_ucc)
+-      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_ucc INSTALL_TEST ${INSTALL_TEST})
-+      torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc)
+      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc INSTALL_TEST ${INSTALL_TEST})
     if(INSTALL_TEST)
       install(TARGETS ProcessGroupUCCTest DESTINATION bin)
       install(TARGETS c10d_cuda_test DESTINATION lib)
-@@ -69,7 +69,7 @@ if(USE_CUDA)
+     endif()
   endif()
 else()
   if(USE_GLOO AND USE_C10D_GLOO)
-    c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu gtest_main)
+-    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
-+    c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu gtest_main gtest)
+    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF)
   endif()
 endif()
 diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt
-index 7dff706..90b1003 100644
+index 9c409e078..6cddd8de4 100644
 --- a/test/cpp/tensorexpr/CMakeLists.txt
 +++ b/test/cpp/tensorexpr/CMakeLists.txt
-@@ -54,7 +54,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
+@@ -51,7 +51,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
 # pthreadpool header. For some build environment we need add the dependency
 # explicitly.
 if(USE_PTHREADPOOL)
@@ -396,4 +429,17 @@ index 7dff706..90b1003 100644
 +  target_link_libraries(test_tensorexpr PRIVATE pthreadpool)
 endif()
 if(USE_CUDA)
-   target_link_libraries(test_tensorexpr PRIVATE
+   target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA)
 diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
 index 8b8ebdc6e..034b5e56c 100644
 --- a/torch/CMakeLists.txt
 +++ b/torch/CMakeLists.txt
@@ -82,8 +82,6 @@ set(TORCH_PYTHON_LINK_LIBRARIES
     Python::Module
     pybind::pybind11
     opentelemetry::api
 -    httplib
 -    nlohmann
     shm
     fmt::fmt-header-only
     ATEN_CPU_FILES_GEN_LIB)
@@ -0,0 +1,64 @@
 Even when building without Kineto, the <ActivityType.h> header is still
 imported and the ActivityType type is used. This patch was copied from
 https://github.com/pytorch/pytorch/pull/111048 and adapted.
 diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h
 index c4efd7785..2caef1f1e 100644
 --- a/torch/csrc/profiler/kineto_shim.h
 +++ b/torch/csrc/profiler/kineto_shim.h
@@ -12,7 +12,55 @@
 #undef USE_KINETO
 #endif
 +#ifdef USE_KINETO
 #include <ActivityType.h>
 +#else
 +namespace libkineto {
 +// copied from header
 +/*
 + * Copyright (c) Meta Platforms, Inc. and affiliates.
 + * All rights reserved.
 + *
 + * This source code is licensed under the BSD-style license found in the
 + * LICENSE file in the root directory of this source tree.
 + */
 +
 +// Note : All activity types are not enabled by default. Please add them
 +// at correct position in the enum
 +enum class ActivityType {
 +    // Activity types enabled by default
 +    CPU_OP = 0, // cpu side ops
 +    USER_ANNOTATION,
 +    GPU_USER_ANNOTATION,
 +    GPU_MEMCPY,
 +    GPU_MEMSET,
 +    CONCURRENT_KERNEL, // on-device kernels
 +    EXTERNAL_CORRELATION,
 +    CUDA_RUNTIME, // host side cuda runtime events
 +    CUDA_DRIVER, // host side cuda driver events
 +    CPU_INSTANT_EVENT, // host side point-like events
 +    PYTHON_FUNCTION,
 +    OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
 +
 +    // Optional Activity types
 +    CUDA_SYNC, // synchronization events between runtime and kernels
 +    GLOW_RUNTIME, // host side glow runtime events
 +    MTIA_RUNTIME, // host side MTIA runtime events
 +    CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
 +    MTIA_CCP_EVENTS, // MTIA ondevice CCP events
 +    HPU_OP, // HPU host side runtime event
 +    XPU_RUNTIME, // host side xpu runtime events
 +    MTIA_WORKLOADD,
 +
 +    PRIVATEUSE1_RUNTIME,
 +    PRIVATEUSE1_DRIVER,
 +
 +    ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
 +    OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,
 +};
 +}
 +
 +#endif
 #include <torch/csrc/Export.h>
 #include <torch/csrc/profiler/api.h>