From 4d5f382b068997a63c54f6fc75a65f08ece7ff10 Mon Sep 17 00:00:00 2001 From: Christopher Milan Date: Tue, 9 Jun 2026 11:29:14 -0700 Subject: [PATCH 1/3] llvm is an optional dependency --- ocelot/CMakeLists.txt | 90 +++++++++++++------ .../include/ocelot/ir/ExternalFunctionSet.h | 24 +++-- ocelot/src/executive/Device.cpp | 8 ++ ocelot/src/ir/ExternalFunctionSet.cpp | 27 ++++-- 4 files changed, 109 insertions(+), 40 deletions(-) diff --git a/ocelot/CMakeLists.txt b/ocelot/CMakeLists.txt index 16a91808..2662d58f 100644 --- a/ocelot/CMakeLists.txt +++ b/ocelot/CMakeLists.txt @@ -5,6 +5,7 @@ option(BUILD_TESTS "Build tests: default OFF" OFF) option(BUILD_TESTS_CUDA "Build CUDA tests: default ON" ON) option(BUILD_TOOLS "Build tool executables: default ON" ON) option(ENABLE_OPENGL "Build OpenGL interop support" OFF) +option(ENABLE_LLVM "Build LLVM backend supoort" OFF) if (NOT APPLE AND BUILD_TESTS AND BUILD_TESTS_CUDA) project(gpuocelot C CXX CUDA ASM) @@ -37,14 +38,16 @@ set(Boost_USE_MULTITHREADED ON) find_package(Boost COMPONENTS filesystem thread REQUIRED) find_package(FLEX 2.5 REQUIRED) find_package(BISON 2.5 REQUIRED) -find_package(ZLIB REQUIRED) -find_library(ZSTD_LIB NAMES zstd libzstd) -find_package(Curses REQUIRED) if (ENABLE_OPENGL) find_package(GLEW REQUIRED) endif() +if (ENABLE_LLVM) +find_package(ZLIB REQUIRED) +find_library(ZSTD_LIB NAMES zstd libzstd) +find_package(Curses REQUIRED) + if ("x${BUILD_LLVM}" STREQUAL "xOFF") find_package(LLVM REQUIRED CONFIG) @@ -232,6 +235,7 @@ set(LLVM_LIBRARIES add_library(llvm INTERFACE) add_dependencies(llvm llvm-project) endif() +endif() set(BUILD_SHARED_LIBS OFF) # ThirdParty/res_embed option add_subdirectory(ThirdParty/hydrazine) @@ -242,7 +246,6 @@ include(ResEmbed) separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${PROJECT_NAME}_DEFINITIONS) set(${PROJECT_NAME}_INCLUDE_DIRS - ${LLVM_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}/ThirdParty/cuda-fatbin-decompression ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_BINARY_DIR}) @@ -250,9 +253,6 @@ set(${PROJECT_NAME}_INCLUDE_DIRS set(${PROJECT_NAME}_LINK_LIBRARIES hydrazine Boost::filesystem - ZLIB::ZLIB - ${CURSES_LIBRARIES} - ${ZSTD_LIB} ${CMAKE_DL_LIBS}) if (ENABLE_OPENGL) @@ -263,16 +263,28 @@ else() list(APPEND ${PROJECT_NAME}_DEFINITIONS ENABLE_OPENGL=0) endif() +if (ENABLE_LLVM) +list(APPEND ${PROJECT_NAME}_INCLUDE_DIRS ${LLVM_INCLUDE_DIRS}) +list(APPEND ${PROJECT_NAME}_LINK_LIBRARIES ZLIB::ZLIB ${CURSES_LIBRARIES} ${ZSTD_LIB}) +set(${PROJECT_NAME}_LIBRARY_DIRS ${LLVM_LIBRARY_DIR}) + if ("x${BUILD_LLVM}" STREQUAL "xON") list(APPEND ${PROJECT_NAME}_LINK_LIBRARIES llvm) endif() -set(${PROJECT_NAME}_LIBRARY_DIRS ${LLVM_LIBRARY_DIR}) +list(APPEND ${PROJECT_NAME}_DEFINITIONS ENABLE_LLVM=1) +else() +list(APPEND ${PROJECT_NAME}_DEFINITIONS ENABLE_LLVM=0) +endif() + include(CTest) function(ocelot_add_tests name) file(GLOB TESTS "src/${name}/test/Test*.cpp" "src/${name}/test/Test*.cu") + if (NOT ENABLE_LLVM) + list(FILTER TESTS EXCLUDE REGEX "LLVM|TestExternalFunctions") + endif() foreach(TEST ${TESTS}) get_filename_component(TEST_WE ${TEST} NAME_WE) set(TEST_NAME ${PROJECT_NAME}_${name}_${TEST_WE}) @@ -375,7 +387,7 @@ target_include_directories(${PROJECT_NAME}_cuda PRIVATE ${${PROJECT_NAME}_INCLUD target_link_libraries(${PROJECT_NAME}_cuda PUBLIC ${${PROJECT_NAME}_LINK_LIBRARIES}) target_link_directories(${PROJECT_NAME}_cuda PUBLIC ${${PROJECT_NAME}_LIBRARY_DIRS}) -add_library(${PROJECT_NAME}_executive STATIC +set(${PROJECT_NAME}_executive_sources src/executive/ATIExecutableKernel.cpp src/executive/ATIGPUDevice.cpp src/executive/CTAContext.cpp @@ -387,13 +399,6 @@ add_library(${PROJECT_NAME}_executive STATIC src/executive/EmulatorDevice.cpp src/executive/ExecutableKernel.cpp src/executive/FrameInfo.cpp - src/executive/LLVMContext.cpp - src/executive/LLVMCooperativeThreadArray.cpp - src/executive/LLVMExecutableKernel.cpp - src/executive/LLVMExecutionManager.cpp - src/executive/LLVMFunctionCallStack.cpp - src/executive/LLVMWorkerThread.cpp - src/executive/MulticoreCPUDevice.cpp src/executive/NVIDIAExecutableKernel.cpp src/executive/NVIDIAGPUDevice.cpp src/executive/PassThroughDevice.cpp @@ -401,6 +406,20 @@ add_library(${PROJECT_NAME}_executive STATIC src/executive/RemoteDevice.cpp src/executive/RuntimeException.cpp src/executive/TextureOperations.cpp) + +if (ENABLE_LLVM) +list(APPEND ${PROJECT_NAME}_executive_sources + src/executive/LLVMContext.cpp + src/executive/LLVMCooperativeThreadArray.cpp + src/executive/LLVMExecutableKernel.cpp + src/executive/LLVMExecutionManager.cpp + src/executive/LLVMFunctionCallStack.cpp + src/executive/LLVMWorkerThread.cpp + src/executive/MulticoreCPUDevice.cpp) +endif() + +add_library(${PROJECT_NAME}_executive STATIC ${${PROJECT_NAME}_executive_sources}) + set_property(TARGET ${PROJECT_NAME}_executive PROPERTY CXX_STANDARD 14) set_property(TARGET ${PROJECT_NAME}_executive PROPERTY POSITION_INDEPENDENT_CODE ON) target_compile_definitions(${PROJECT_NAME}_executive PRIVATE ${${PROJECT_NAME}_DEFINITIONS}) @@ -409,7 +428,7 @@ target_include_directories(${PROJECT_NAME}_executive PRIVATE ${${PROJECT_NAME}_I target_link_libraries(${PROJECT_NAME}_executive PUBLIC ${${PROJECT_NAME}_LINK_LIBRARIES}) target_link_directories(${PROJECT_NAME}_executive PUBLIC ${${PROJECT_NAME}_LIBRARY_DIRS}) -add_library(${PROJECT_NAME}_ir STATIC +set(${PROJECT_NAME}_ir_sources src/ir/ControlFlowGraph.cpp src/ir/Dim3.cpp src/ir/ExternalFunctionSet.cpp @@ -421,11 +440,6 @@ add_library(${PROJECT_NAME}_ir STATIC src/ir/IRKernel.cpp src/ir/Instruction.cpp src/ir/Kernel.cpp - src/ir/LLVMInstruction.cpp - src/ir/LLVMKernel.cpp - src/ir/LLVMModuleManager.cpp - src/ir/LLVMState.cpp - src/ir/LLVMStatement.cpp src/ir/Local.cpp src/ir/Module.cpp src/ir/PTXInstruction.cpp @@ -434,6 +448,18 @@ add_library(${PROJECT_NAME}_ir STATIC src/ir/PTXStatement.cpp src/ir/Parameter.cpp src/ir/Texture.cpp) + +if (ENABLE_LLVM) +list(APPEND ${PROJECT_NAME}_ir_sources + src/ir/LLVMInstruction.cpp + src/ir/LLVMKernel.cpp + src/ir/LLVMModuleManager.cpp + src/ir/LLVMState.cpp + src/ir/LLVMStatement.cpp) +endif() + +add_library(${PROJECT_NAME}_ir STATIC ${${PROJECT_NAME}_ir_sources}) + set_property(TARGET ${PROJECT_NAME}_ir PROPERTY CXX_STANDARD 14) set_property(TARGET ${PROJECT_NAME}_ir PROPERTY POSITION_INDEPENDENT_CODE ON) target_compile_definitions(${PROJECT_NAME}_ir PRIVATE ${${PROJECT_NAME}_DEFINITIONS}) @@ -525,10 +551,16 @@ target_include_directories(${PROJECT_NAME}_transforms PRIVATE ${${PROJECT_NAME}_ target_link_libraries(${PROJECT_NAME}_transforms PUBLIC ${${PROJECT_NAME}_LINK_LIBRARIES}) target_link_directories(${PROJECT_NAME}_transforms PUBLIC ${${PROJECT_NAME}_LIBRARY_DIRS}) -add_library(${PROJECT_NAME}_translator STATIC +set(${PROJECT_NAME}_translator_sources src/translator/PTXToILTranslator.cpp - src/translator/PTXToLLVMTranslator.cpp src/translator/Translator.cpp) + +if (ENABLE_LLVM) +list(APPEND ${PROJECT_NAME}_translator_sources src/translator/PTXToLLVMTranslator.cpp) +endif() + +add_library(${PROJECT_NAME}_translator STATIC ${${PROJECT_NAME}_translator_sources}) + set_property(TARGET ${PROJECT_NAME}_translator PROPERTY CXX_STANDARD 14) set_property(TARGET ${PROJECT_NAME}_translator PROPERTY POSITION_INDEPENDENT_CODE ON) target_compile_definitions(${PROJECT_NAME}_translator PRIVATE ${${PROJECT_NAME}_DEFINITIONS}) @@ -583,14 +615,16 @@ endif() target_link_libraries(${PROJECT_NAME} PUBLIC ${${PROJECT_NAME}_LINK_LIBRARIES}) # link llvm and remove unused sections +if (ENABLE_LLVM) +target_link_libraries(${PROJECT_NAME} PUBLIC ${ZLIB_LIBRARIES} ${CURSES_LIBRARIES}) if(APPLE) target_link_libraries(${PROJECT_NAME} PRIVATE -Wl,-dead_strip ${LLVM_LIBRARIES}) else() target_link_libraries(${PROJECT_NAME} PRIVATE -Wl,--gc-sections -Wl,--start-group ${LLVM_LIBRARIES} -Wl,--end-group) endif() +endif() target_link_directories(${PROJECT_NAME} PUBLIC ${${PROJECT_NAME}_LIBRARY_DIRS}) -target_link_libraries(${PROJECT_NAME} PUBLIC ${ZLIB_LIBRARIES} ${CURSES_LIBRARIES}) install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION $,/usr/local/lib,/usr/lib>) install(CODE "execute_process(COMMAND ldconfig)") @@ -637,8 +671,10 @@ if(BUILD_TESTS) res_embed(TARGET ${PROJECT_NAME}_executive_TestKernels NAME "TestKernels_ptx" PATH ${CMAKE_CURRENT_SOURCE_DIR}/src/executive/test/TestKernels.ptx KEYWORD) - res_embed(TARGET ${PROJECT_NAME}_executive_TestLLVMKernels NAME "TestLLVMKernels_ptx" - PATH ${CMAKE_CURRENT_SOURCE_DIR}/src/executive/test/TestLLVMKernels.ptx KEYWORD) + if (ENABLE_LLVM) + res_embed(TARGET ${PROJECT_NAME}_executive_TestLLVMKernels NAME "TestLLVMKernels_ptx" + PATH ${CMAKE_CURRENT_SOURCE_DIR}/src/executive/test/TestLLVMKernels.ptx KEYWORD) + endif() if(BUILD_TESTS_CUDA) ocelot_add_tests(cuda) diff --git a/ocelot/include/ocelot/ir/ExternalFunctionSet.h b/ocelot/include/ocelot/ir/ExternalFunctionSet.h index eb46f35a..5c55fb1a 100644 --- a/ocelot/include/ocelot/ir/ExternalFunctionSet.h +++ b/ocelot/include/ocelot/ir/ExternalFunctionSet.h @@ -11,11 +11,19 @@ #include // Forward Declarations +#if ENABLE_LLVM namespace llvm { class Module; } +#endif namespace ir { +#if ENABLE_LLVM +typedef llvm::Module ExternalFunctionModule; +#else +struct ExternalFunctionModule; +#endif + /*! \brief Holds a collection of external functions The idea here is to allow arbitrary PTX functions to call into external @@ -33,7 +41,7 @@ class ExternalFunctionSet { public: ExternalFunction(const std::string& identifier = "", - void* functionPointer = 0, llvm::Module* m = 0); + void* functionPointer = 0, ExternalFunctionModule* m = 0); public: void call(void* parameters, const ir::PTXKernel::Prototype& p); @@ -45,10 +53,10 @@ class ExternalFunctionSet typedef void (*ExternalCallType)(void*); private: - std::string _name; - void* _functionPointer; - llvm::Module* _module; - ExternalCallType _externalFunctionPointer; + std::string _name; + void* _functionPointer; + ExternalFunctionModule* _module; + ExternalCallType _externalFunctionPointer; }; typedef std::map FunctionSet; @@ -71,10 +79,10 @@ class ExternalFunctionSet ExternalFunction* find(const std::string& name) const; private: - FunctionSet _functions; - llvm::Module* module; + FunctionSet _functions; + ExternalFunctionModule* module; - llvm::Module* _module(); + ExternalFunctionModule* _module(); }; diff --git a/ocelot/src/executive/Device.cpp b/ocelot/src/executive/Device.cpp index 48088f9c..86aef0b2 100644 --- a/ocelot/src/executive/Device.cpp +++ b/ocelot/src/executive/Device.cpp @@ -9,7 +9,9 @@ #include #include #include +#if ENABLE_LLVM #include +#endif #include #include #include @@ -86,7 +88,9 @@ executive::DeviceVector executive::Device::createDevices( break; case ir::Instruction::LLVM: { +#if ENABLE_LLVM devices.push_back(new MulticoreCPUDevice(flags)); +#endif } break; case ir::Instruction::CAL: @@ -135,7 +139,11 @@ unsigned int executive::Device::deviceCount(ir::Instruction::Architecture isa, break; case ir::Instruction::LLVM: { +#if ENABLE_LLVM return 1; +#else + return 0; +#endif } break; case ir::Instruction::CAL: diff --git a/ocelot/src/ir/ExternalFunctionSet.cpp b/ocelot/src/ir/ExternalFunctionSet.cpp index 8427ce98..841227cb 100644 --- a/ocelot/src/ir/ExternalFunctionSet.cpp +++ b/ocelot/src/ir/ExternalFunctionSet.cpp @@ -9,9 +9,11 @@ // Ocelot Includes #include -#include #include +#if ENABLE_LLVM #include +#include +#endif // Hydrazine Includes #include @@ -19,6 +21,7 @@ #include // LLVM Includes +#if ENABLE_LLVM #include #include "llvm/IR/LegacyPassManager.h" #include @@ -28,6 +31,7 @@ #include #include #include +#endif // Preprocessor Macros #ifdef REPORT_BASE @@ -37,6 +41,7 @@ #define REPORT_BASE 0 #define REPORT_LLVM 0 +#if ENABLE_LLVM using namespace llvm::legacy; namespace llvm { @@ -44,6 +49,7 @@ namespace llvm { LLVMContext &getGlobalContext(); } // namespace llvm +#endif namespace ir { @@ -54,6 +60,7 @@ static unsigned int align(unsigned int address, unsigned int alignment) return remainder == 0 ? address : (address + alignment - remainder); } +#if ENABLE_LLVM static LLVMInstruction::DataType translateType(PTXOperand::DataType t) { switch(t) @@ -118,7 +125,7 @@ static std::string getValueString(unsigned int value) return stream.str(); } -static std::unique_ptr jitFunction( +static std::unique_ptr jitFunction( const ExternalFunctionSet::ExternalFunction& f, const PTXKernel::Prototype& prototype) { @@ -344,9 +351,10 @@ static std::unique_ptr jitFunction( // done, the function is now in the module return m; } +#endif // ENABLE_LLVM ExternalFunctionSet::ExternalFunction::ExternalFunction(const std::string& i, - void* f, llvm::Module* m) + void* f, ExternalFunctionModule* m) : _name(i), _functionPointer(f), _module(m), _externalFunctionPointer(0) { @@ -355,6 +363,7 @@ ExternalFunctionSet::ExternalFunction::ExternalFunction(const std::string& i, void ExternalFunctionSet::ExternalFunction::call(void* parameters, const ir::PTXKernel::Prototype& p) { +#if ENABLE_LLVM if(!_externalFunctionPointer) { assert(_module); @@ -375,6 +384,9 @@ void ExternalFunctionSet::ExternalFunction::call(void* parameters, // call through the interface to the external function _externalFunctionPointer(parameters); +#else + throw hydrazine::Exception("Calling registered host functions requires LLVM support."); +#endif } const std::string& ExternalFunctionSet::ExternalFunction::name() const @@ -397,17 +409,19 @@ ExternalFunctionSet::ExternalFunctionSet() { } -llvm::Module* ExternalFunctionSet::_module() +ExternalFunctionModule* ExternalFunctionSet::_module() { +#if ENABLE_LLVM if (!module) module = new llvm::Module("_ZOcelotExternalFunctionModule", llvm::getGlobalContext()); - +#endif return module; } ExternalFunctionSet::~ExternalFunctionSet() { +#if ENABLE_LLVM for(FunctionSet::const_iterator external = _functions.begin(); external != _functions.end(); ++external) { @@ -424,6 +438,7 @@ ExternalFunctionSet::~ExternalFunctionSet() executive::LLVMState::jit()->removeModule(_module()); delete _module(); +#endif } void ExternalFunctionSet::add(const std::string& name, void* pointer) @@ -450,6 +465,7 @@ void ExternalFunctionSet::remove(const std::string& name) report("Removing function " << name); +#if ENABLE_LLVM llvm::Function* llvmFunction = _module()->getFunction( function->second.mangledName()); if(llvmFunction != 0) @@ -466,6 +482,7 @@ void ExternalFunctionSet::remove(const std::string& name) << function->second.name() << " in module."); global->eraseFromParent(); } +#endif _functions.erase(function); } From f2dabfec01593beba6c6a96cdf23b2e52240c077 Mon Sep 17 00:00:00 2001 From: Christopher Milan Date: Tue, 9 Jun 2026 11:55:23 -0700 Subject: [PATCH 2/3] remove a bunch of deps --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 23f9c148..1afa4927 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,10 +20,10 @@ jobs: submodules: recursive - name: Install Linux deps if: runner.os == 'Linux' - run: sudo apt-get update && sudo apt-get -y --no-install-recommends install git g++ cmake ninja-build llvm-15-dev zlib1g-dev flex bison libfl-dev libzstd-dev + run: sudo apt-get update && sudo apt-get -y --no-install-recommends install cmake ninja-build - name: Install macOS deps if: runner.os == 'macOS' - run: brew install cmake ninja llvm@15 zlib flex bison boost zstd ncurses + run: brew install flex bison boost - uses: actions/checkout@v6 if: runner.os == 'Linux' with: From e731e5a8883fbbf08a1fdb645035c98452a895fe Mon Sep 17 00:00:00 2001 From: Christopher Milan Date: Tue, 9 Jun 2026 13:06:41 -0700 Subject: [PATCH 3/3] fix --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1afa4927..902b636f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,7 +20,7 @@ jobs: submodules: recursive - name: Install Linux deps if: runner.os == 'Linux' - run: sudo apt-get update && sudo apt-get -y --no-install-recommends install cmake ninja-build + run: sudo apt-get update && sudo apt-get -y --no-install-recommends install cmake ninja-build libfl-dev - name: Install macOS deps if: runner.os == 'macOS' run: brew install flex bison boost @@ -43,7 +43,7 @@ jobs: ./b2 install link=static runtime-link=shared threading=multi variant=release cxxflags=-fPIC - name: Configure (Linux) if: runner.os == 'Linux' - run: cmake -S ocelot -B ocelot/build -G Ninja -DBUILD_TESTS=ON -DBUILD_TESTS_CUDA=OFF -DLLVM_DIR=$(llvm-config-15 --cmakedir) -DBOOST_ROOT=${{ github.workspace }}/boost-fpic + run: cmake -S ocelot -B ocelot/build -G Ninja -DBUILD_TESTS=ON -DBUILD_TESTS_CUDA=OFF -DBOOST_ROOT=${{ github.workspace }}/boost-fpic - name: Configure (macOS) if: runner.os == 'macOS' run: cmake -S ocelot -B ocelot/build -G Ninja -DBUILD_TESTS=ON -DBUILD_TESTS_CUDA=OFF -DCMAKE_POLICY_VERSION_MINIMUM=3.5