diff --git a/.github/workflows/cffi-artifacts.yml b/.github/workflows/cffi-artifacts.yml new file mode 100644 index 0000000..ab68eb2 --- /dev/null +++ b/.github/workflows/cffi-artifacts.yml @@ -0,0 +1,62 @@ +name: Build C FFI Artifacts + +on: + push: + branches: + - main + - master + tags: + - "*" + pull_request: + workflow_dispatch: + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + name: Build cffi on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + include: + - os: ubuntu-22.04 + artifact: cffi-linux-x86_64 + lib_name: libhycore_cffi.so + - os: macos-latest + artifact: cffi-macos + lib_name: libhycore_cffi.dylib + - os: windows-latest + artifact: cffi-windows + lib_name: hycore_cffi.dll + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Cache cargo artifacts + uses: Swatinem/rust-cache@v2 + with: + shared-key: cffi-${{ matrix.os }} + + - name: Build hycore-cffi library + run: cargo build -p hycore-cffi --release + + - name: Collect headers and libraries + shell: bash + run: | + set -euo pipefail + DEST="cffi-artifacts/${{ matrix.artifact }}" + mkdir -p "$DEST/include" "$DEST/lib" + cp cffi/include/hycore.h "$DEST/include/" + cp "target/release/${{ matrix.lib_name }}" "$DEST/lib/" + + - name: Upload artifact + uses: actions/upload-artifact@v5 + with: + name: ${{ matrix.artifact }} + path: cffi-artifacts/${{ matrix.artifact }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..fe9788b --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,21 @@ +name: Cargo Build & Test + +on: + push: + pull_request: + +env: + CARGO_TERM_COLOR: always + +jobs: + build_and_test: + name: Build and Test + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v6 + - name: Set up Rust + run: rustup toolchain install stable --profile minimal + - uses: Swatinem/rust-cache@v2 + - run: cargo build + - run: cargo test diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index e3e3d16..b981d28 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -33,6 +33,8 @@ jobs: target: armv7 steps: - uses: actions/checkout@v6 + with: + fetch-depth: 0 - uses: actions/setup-python@v6 with: python-version: 3.x @@ -63,6 +65,8 @@ jobs: target: aarch64 steps: - uses: actions/checkout@v6 + with: + fetch-depth: 0 - uses: actions/setup-python@v6 with: python-version: 3.x @@ -96,6 +100,8 @@ jobs: python_arch: arm64 steps: - uses: actions/checkout@v6 + with: + fetch-depth: 0 - uses: actions/setup-python@v6 with: python-version: 3.13 @@ -124,6 +130,8 @@ jobs: target: aarch64 steps: - uses: actions/checkout@v6 + with: + fetch-depth: 0 - uses: actions/setup-python@v6 with: python-version: 3.x @@ -144,6 +152,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 + with: + fetch-depth: 0 - name: Build sdist uses: PyO3/maturin-action@v1 with: diff --git a/.gitignore b/.gitignore index 4ef2eb7..e55a146 100644 --- a/.gitignore +++ b/.gitignore @@ -41,7 +41,6 @@ CMakeCache.txt CMakeFiles CMakeScripts Testing -Makefile cmake_install.cmake install_manifest.txt compile_commands.json @@ -275,3 +274,9 @@ tags # Added by cargo Cargo.lock /target + +# I don't know why i try to use AI agents when +# I always need to rewrite everything myself cause +# they are shit. +AGENTS.md + diff --git a/.vscode/settings.json b/.vscode/settings.json index 3a89c71..1bcc213 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -9,6 +9,8 @@ "**/target": true, "**/__pycache__": true, "**/_sys.abi3.so": true, + "**/.crush": true, + "Cargo.lock": true, }, "editor.formatOnSave": true, "files.associations": { @@ -16,6 +18,6 @@ "stdint.h": "c" }, "rust-analyzer.cargo.features": [ - "ext_all" + "ext_all", ] } \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index ea8a55e..dc04925 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ members = [ "hyinstr", "hycore", "python", + "cffi", "examples/hycore-examples", "examples/hyinstr-parser", ] @@ -31,6 +32,7 @@ uuid = "^1" crossbeam = "^0.8" parking_lot = "^0.12" log = ">=0.2" +slotmap = "^1" num-bigint = "^0.4" bigdecimal = "^0.4" auto_enums = ">=0.7" @@ -44,6 +46,10 @@ ariadne = "0.6" pyo3 = "0.27.0" once_cell = "^1" inventory = "^0.3" +zstd = "=0.13.3" +borsh = "^1" +libc = "^0.2" +dashmap = "^6" criterion = "0.5" rand = "0.9.2" diff --git a/cffi/Cargo.toml b/cffi/Cargo.toml new file mode 100644 index 0000000..2a297f1 --- /dev/null +++ b/cffi/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "hycore-cffi" +version = "0.1.2" +edition = "2021" +license = "MIT OR Apache-2.0" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +libc.workspace = true +semver.workspace = true +strum = { workspace = true, features = ["derive"] } +hycore = { workspace = true, features = ["ext_all"] } + +[build-dependencies] +cbindgen = "0.27" +semver.workspace = true +hycore.workspace = true + +[features] +# No default features; consumers enable hycore features as needed +default = [] + +legacy_nozstd = ["hycore/legacy_nozstd"] diff --git a/cffi/build.rs b/cffi/build.rs new file mode 100644 index 0000000..a9030e8 --- /dev/null +++ b/cffi/build.rs @@ -0,0 +1,83 @@ +use std::io::Write; + +use cbindgen::{Config, ConstantConfig}; +use hycore::magic; + +fn main() { + let crate_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); + let output_file = format!("{}/include/hycore.h", crate_dir); + + println!("cargo:rerun-if-changed=build.rs"); + println!("cargo:rerun-if-changed=src/"); + let version = std::env::var("CARGO_PKG_VERSION").unwrap(); + let version = semver::Version::parse(&version).unwrap(); + + let prefix = format!( + "\n#define HY_VERSION_MAJOR {}\n\ + #define HY_VERSION_MINOR {}\n\ + #define HY_VERSION_PATCH {}\n\ + #define HY_LOGGER_NAME_EXT \"{}\"\n", + version.major, + version.minor, + version.patch, + magic::HYPERION_LOGGER_NAME_EXT, + ); + let file_header = format!( + "/** + * @file hycore.h + * @brief Main C API header for Hyperion Core library. + * @version {}.{}.{} + * + * This file header provides the C API for interacting with the Hyperion framework. + * It was generated using cbindgen={}. DO NOT EDIT THIS FILE MANUALLY! + * + * This file is part of Hyperion. + * + * Copyright (C) 2024 Hyperion Project + * + * Hyperion is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Hyperion is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Hyperion. If not, see . + */\n\n", + version.major, + version.minor, + version.patch, + cbindgen::VERSION + ); + + let bindings = cbindgen::Builder::new() + .with_config(Config { + language: cbindgen::Language::C, + documentation: true, + documentation_style: cbindgen::DocumentationStyle::Doxy, + include_guard: Some("_HYCORE_H".to_string()), + cpp_compat: true, + documentation_length: cbindgen::DocumentationLength::Full, + tab_width: 2, + constant: ConstantConfig { + allow_static_const: true, + ..Default::default() + }, + ..Default::default() + }) + .with_crate(crate_dir) + .with_after_include(prefix) + .with_braces(cbindgen::Braces::NextLine) + .include_item("HyLogCreateInfoEXT") + .generate() + .expect("Unable to generate bindings"); + // bindings.write_to_file(output_file); + let file = std::fs::File::create(output_file).unwrap(); + let mut writer = std::io::BufWriter::new(file); + writer.write_all(file_header.as_bytes()).unwrap(); + bindings.write(&mut writer); +} diff --git a/cffi/examples/Makefile b/cffi/examples/Makefile new file mode 100644 index 0000000..890d3f7 --- /dev/null +++ b/cffi/examples/Makefile @@ -0,0 +1,31 @@ +CC ?= gcc +CXX ?= g++ +PROFILE ?= debug +INCLUDE_DIR := ../include +CFLAGS ?= -I$(INCLUDE_DIR) -Wall -Wextra +CARGO_FLAGS := +ifeq ($(PROFILE),release) +CARGO_FLAGS += --release +endif + +SO_PATH := ../../target/$(PROFILE)/libhycore_cffi.so +SO_DIR := $(dir $(SO_PATH)) +SO_BASENAME := $(notdir $(SO_PATH)) +SO_STEM := $(patsubst lib%.so,%,$(SO_BASENAME)) + +LDFLAGS := -L$(SO_DIR) -Wl,-rpath,$(SO_DIR) -l$(SO_STEM) -lm + +all: sample.out + +lib: + cargo build $(CARGO_FLAGS) --manifest-path ../Cargo.toml + +sample.out: sample.c lib + echo $(SO_PATH) + $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) + +run: sample.out + ./sample.out + +clean: + rm -f sample.out diff --git a/cffi/examples/sample.c b/cffi/examples/sample.c new file mode 100644 index 0000000..5b40ba7 --- /dev/null +++ b/cffi/examples/sample.c @@ -0,0 +1,262 @@ +#include +#include +#include + +static const char *hycore_c_str = + "define i32 square(%a: i32) {\n" + "entry:\n" + " %result: i32 = imul.wrap %a, %a\n" + " ret %result\n" + "}\n"; + +#if defined(_MSC_VER) +#define COLOR_RESET "" +#define COLOR_RED "" +#define COLOR_GREEN "" +#define COLOR_YELLOW "" +#define COLOR_BLUE "" +#define COLOR_BRIGHT_BLACK "" +#else +#define COLOR_RESET "\x1b[0m" +#define COLOR_RED "\x1b[31m" +#define COLOR_GREEN "\x1b[32m" +#define COLOR_YELLOW "\x1b[33m" +#define COLOR_BLUE "\x1b[34m" +#define COLOR_BRIGHT_BLACK "\x1b[90m" +#endif + +static const char *log_level_to_color(HyLogLevelEXT level) +{ + switch (level) + { + case HY_LOG_LEVEL_TRACE: + return COLOR_BRIGHT_BLACK; + case HY_LOG_LEVEL_DEBUG: + return COLOR_BLUE; + case HY_LOG_LEVEL_INFO: + return COLOR_GREEN; + case HY_LOG_LEVEL_WARN: + return COLOR_YELLOW; + case HY_LOG_LEVEL_ERROR: + return COLOR_RED; + default: + return COLOR_RESET; + } +} + +static const char *log_level_to_string(HyLogLevelEXT level) +{ + switch (level) + { + case HY_LOG_LEVEL_TRACE: + return "[TRACE]"; + case HY_LOG_LEVEL_DEBUG: + return "[DEBUG ]"; + case HY_LOG_LEVEL_INFO: + return "[INFO ]"; + case HY_LOG_LEVEL_WARN: + return "[WARN ]"; + case HY_LOG_LEVEL_ERROR: + return "[ERROR]"; + default: + return "[UNKNOWN]"; + } +} + +void callback_function(struct HyLogMessageEXT *message); +void print_hex_ascii(const uint8_t *data, uint32_t length, bool compute_stats); + +int main(int argc, char **argv) +{ + if (argc < 1 || argc >= 3) + { + printf("Usage: %s \n", argv[0]); + return -1; + } + + /* Read the assembly file if provided, overwise default to hycore_c_str */ + const char *assembly_data = hycore_c_str; + bool assembly_data_allocated = false; + + if (argc == 2) + { + const char *filename = argv[1]; + FILE *file = fopen(filename, "rb"); + if (!file) + { + printf("Failed to open file: %s\n", filename); + return -1; + } + fseek(file, 0, SEEK_END); + long fileSize = ftell(file); + fseek(file, 0, SEEK_SET); + char *fileData = (char *)malloc(fileSize + 1); + if (!fileData) + { + printf("Memory allocation failed for file data.\n"); + fclose(file); + return -1; + } + fread(fileData, 1, fileSize, file); + fileData[fileSize] = '\0'; + fclose(file); + assembly_data = fileData; + assembly_data_allocated = true; + } + + /* Retrieve and print Hycore version information */ + HyVersionInfo version; + hyGetVersionInfo(&version); + printf("Hycore Version: %u.%u.%u\n", version.major, version.minor, version.patch); + + /* Construct a new instance */ + HyApplicationInfo appInfo; + appInfo.sType = HY_STRUCTURE_TYPE_APPLICATION_INFO; + appInfo.applicationVersion = version; + appInfo.pApplicationName = "SimpleCApp"; + appInfo.engineVersion = version; + appInfo.pEngineName = "HycoreEngine"; + + HyLogCreateInfoEXT logCreateInfo; + logCreateInfo.sType = HY_STRUCTURE_TYPE_LOG_CREATE_INFO_EXT; + logCreateInfo.level = HY_LOG_LEVEL_TRACE; + logCreateInfo.callback = callback_function; + logCreateInfo.pNext = NULL; + + const char *extensions[] = {HY_LOGGER_NAME_EXT}; + HyInstanceCreateInfo createInfo; + createInfo.sType = HY_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + createInfo.pApplicationInfo = &appInfo; + createInfo.ppEnabledExtensions = extensions; + createInfo.enabledExtensionsCount = sizeof(extensions) / sizeof(extensions[0]); + createInfo.nodeId = 0; + createInfo.pNext = &logCreateInfo; + + HyInstance *instance; + HyResult result = hyCreateInstance(&createInfo, &instance); + if (result != HY_RESULT_SUCCESS) + { + printf("Failed to create Hycore instance. Error code: %d\n", result); + if (assembly_data_allocated) + free((void *)assembly_data); + return -1; + } + + /* Compile a simple module */ + HyModuleSourceInfo sourceInfo; + sourceInfo.sType = HY_STRUCTURE_TYPE_MODULE_SOURCE_INFO; + sourceInfo.sourceType = HY_MODULE_SOURCE_TYPE_ASSEMBLY; + sourceInfo.filename = "sample.c"; + sourceInfo.data = (const uint8_t *)assembly_data; + + const HyModuleSourceInfo *sources[] = {&sourceInfo}; + HyModuleCompileInfo compileInfo; + compileInfo.sType = HY_STRUCTURE_TYPE_MODULE_COMPILE_INFO; + compileInfo.ppSources = sources; + compileInfo.sourcesCount = sizeof(sources) / sizeof(sources[0]); + + uint8_t *compiledData = NULL; + uint32_t compiledDataLen = 0; + result = hyCompileModule(instance, &compileInfo, &compiledData, &compiledDataLen); + if (assembly_data_allocated) + free((void *)assembly_data); + if (result != HY_RESULT_SUCCESS) + { + printf("Module compilation failed. Error code: %d\n", result); + hyDestroyInstance(instance); + return -1; + } + + printf("Module compiled successfully. Compiled data length: %u bytes\n", compiledDataLen); + putchar('\n'); + printf("Compiled Module Data (Hex):\n"); + print_hex_ascii(compiledData, compiledDataLen, true); + putchar('\n'); + + /* Load the compiled module */ + HyModule *module; + result = hyLoadModule(instance, compiledData, compiledDataLen, &module); + if (result != HY_RESULT_SUCCESS) + { + printf("Module loading failed. Error code: %d\n", result); + free(compiledData); + hyDestroyInstance(instance); + return -1; + } + + /* free compiled data if necessary */ + free(compiledData); + + /* Destroy the loaded module */ + hyDestroyModule(module); + + /* Clean up and exit */ + hyDestroyInstance(instance); + printf("Hycore instance destroyed.\n"); + + return 0; +} + +void print_hex_ascii(const uint8_t *data, uint32_t length, bool compute_stats) +{ + uint32_t frequency[256] = {0}; + + uint32_t offset = 0; + while (offset < length) + { + printf("%08X | ", offset); /* offset */ + for (uint32_t i = 0; i < 16; i++) + { + if (offset + i < length) + printf("%02X ", data[offset + i]); + else + printf(" "); + } + printf("| "); + for (uint32_t i = 0; i < 16; i++) + { + if (offset + i < length) + { + char c = data[offset + i]; + if (c >= 32 && c <= 126) + printf("%c", c); + else + printf("."); + + // Update frequency count + frequency[(uint8_t)c]++; + } + } + printf("\n"); + + offset += 16; + } + + if (compute_stats) + { + // Compute shanon entropy of the data + double entropy = 0.0; + for (int i = 0; i < 256; i++) + { + if (frequency[i] > 0) + { + double p = (double)frequency[i] / length; + entropy -= p * log2(p); + } + } + + // Display histogram + printf("Shannon Entropy: %.4f bits/byte (max 8.0000 bits/byte)\n", entropy); + printf("Number of bytes: %u\n", length); + } +} + +void callback_function(struct HyLogMessageEXT *message) +{ + printf("%s%s[%s:%u] -- %s\n" COLOR_RESET, + log_level_to_color(message->level), + log_level_to_string(message->level), + message->file, + message->line, + message->message); +} \ No newline at end of file diff --git a/cffi/include/hycore.h b/cffi/include/hycore.h new file mode 100644 index 0000000..4dd0d94 --- /dev/null +++ b/cffi/include/hycore.h @@ -0,0 +1,245 @@ +/** + * @file hycore.h + * @brief Main C API header for Hyperion Core library. + * @version 0.1.2 + * + * This file header provides the C API for interacting with the Hyperion framework. + * It was generated using cbindgen=0.27.0. DO NOT EDIT THIS FILE MANUALLY! + * + * This file is part of Hyperion. + * + * Copyright (C) 2024 Hyperion Project + * + * Hyperion is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Hyperion is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Hyperion. If not, see . + */ + +#ifndef _HYCORE_H +#define _HYCORE_H + +#include +#include +#include +#include + +#define HY_VERSION_MAJOR 0 +#define HY_VERSION_MINOR 1 +#define HY_VERSION_PATCH 2 +#define HY_LOGGER_NAME_EXT "__EXT_hyperion_logger" + + +enum HyLogLevelEXT +#ifdef __cplusplus + : uint32_t +#endif // __cplusplus + +{ + HY_LOG_LEVEL_TRACE = 0, + HY_LOG_LEVEL_DEBUG = 1, + HY_LOG_LEVEL_INFO = 2, + HY_LOG_LEVEL_WARN = 3, + HY_LOG_LEVEL_ERROR = 4, +}; +#ifndef __cplusplus +typedef uint32_t HyLogLevelEXT; +#endif // __cplusplus + +enum HyModuleSourceType +#ifdef __cplusplus + : uint32_t +#endif // __cplusplus + +{ + HY_MODULE_SOURCE_TYPE_ASSEMBLY, +}; +#ifndef __cplusplus +typedef uint32_t HyModuleSourceType; +#endif // __cplusplus + +enum HyResult +#ifdef __cplusplus + : uint32_t +#endif // __cplusplus + +{ + HY_RESULT_SUCCESS, + HY_RESULT_INVALID_POINTER, + HY_RESULT_IO_ERROR, + HY_RESULT_OUT_OF_MEMORY, + HY_RESULT_MANIFEST_PARSE_ERROR, + HY_RESULT_UNKNOWN, + HY_RESULT_PLUGIN_NOT_FOUND, + HY_RESULT_UTF8_ERROR, + HY_RESULT_INSTR_ERROR, + HY_RESULT_KEY_NOT_FOUND, + HY_RESULT_STRUCTURE_TYPE_MISMATCH, + HY_RESULT_DUPLICATED_KEY, +}; +#ifndef __cplusplus +typedef uint32_t HyResult; +#endif // __cplusplus + +enum HyStructureType +#ifdef __cplusplus + : uint32_t +#endif // __cplusplus + +{ + HY_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + HY_STRUCTURE_TYPE_APPLICATION_INFO, + HY_STRUCTURE_TYPE_MODULE_COMPILE_INFO, + HY_STRUCTURE_TYPE_MODULE_SOURCE_INFO, + HY_STRUCTURE_TYPE_LOG_CREATE_INFO_EXT = 268435456, +}; +#ifndef __cplusplus +typedef uint32_t HyStructureType; +#endif // __cplusplus + +typedef struct HyInstance HyInstance; + +typedef struct HyModule HyModule; + +typedef struct HyVersionInfo +{ + uint16_t major; + uint16_t minor; + uint16_t patch; +} HyVersionInfo; + +typedef struct HyApplicationInfo +{ + HyStructureType sType; + struct HyVersionInfo applicationVersion; + const char *pApplicationName; + struct HyVersionInfo engineVersion; + const char *pEngineName; +} HyApplicationInfo; + +typedef struct HyInstanceCreateInfo +{ + HyStructureType sType; + const struct HyApplicationInfo *pApplicationInfo; + const char *const *ppEnabledExtensions; + uint32_t enabledExtensionsCount; + uint32_t nodeId; + void *pNext; +} HyInstanceCreateInfo; + +typedef struct HyModuleSourceInfo +{ + HyStructureType sType; + HyModuleSourceType sourceType; + const char *filename; + const uint8_t *data; +} HyModuleSourceInfo; + +typedef struct HyModuleCompileInfo +{ + HyStructureType sType; + const struct HyModuleSourceInfo *const *ppSources; + uint32_t sourcesCount; +} HyModuleCompileInfo; + +typedef struct HyLogMessageEXT +{ + HyLogLevelEXT level; + int64_t timeStamp; + const char *message; + const char *module; + const char *file; + uint32_t line; + const char *threadName; + void *pNext; +} HyLogMessageEXT; + +typedef void (*HyLogCallback_PFN)(struct HyLogMessageEXT *message); + +typedef struct HyLogCreateInfoEXT +{ + HyStructureType sType; + HyLogLevelEXT level; + HyLogCallback_PFN callback; + void *pNext; +} HyLogCreateInfoEXT; + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * Retrieves information about the version of the Hycore library. + * + * # Safety + * - The `pVersionInfo` pointer must be a valid, non-null pointer to a `HyVersionInfo` struct. + */ +void hyGetVersionInfo(struct HyVersionInfo *pVersionInfo); + +/** + * Create an instance of the Hycore library. + * + * # Safety + * - The `pInstanceCreateInfo` pointer must be a valid, non-null pointer to a `HyInstanceCreateInfo` struct. + * - The `pInstance` pointer must be a valid, non-null pointer to a pointer to `HyInstance`. + */ +HyResult hyCreateInstance(const struct HyInstanceCreateInfo *pInstanceCreateInfo, + struct HyInstance **pInstance); + +/** + * Destroys an instance created by `hyCreateInstance`. + * + * # Safety + * - The `instance` pointer must be a valid, non-null pointer to a `HyInstance`. + */ +void hyDestroyInstance(struct HyInstance *instance); + +/** + * Compile module sources into a binary format. + * + * # Safety + * - The `instance` pointer must be a valid, non-null pointer to a `HyInstance`. + * - The `pModuleCompileInfo` pointer must be a valid, non-null pointer to a `HyModuleCompileInfo`. + * - The `ppDataPtr` and `pDataLen` pointers must be valid, non-null pointers to receive the output data. The caller is responsible for freeing the allocated data using `libc::free`. + * + */ +HyResult hyCompileModule(const struct HyInstance *instance, + const struct HyModuleCompileInfo *pModuleCompileInfo, + uint8_t **ppDataPtr, + uint32_t *pDataLen); + +/** + * Loads a compiled module from binary data. + * + * # Safety + * - The `instance` pointer must be a valid, non-null pointer to a `HyInstance`. + * - The `pDataPtr` pointer must be a valid, non-null pointer to the compiled module data. + * - The `dataLen` must be the correct length of the compiled module data. + * - The `pModule` pointer must be a valid, non-null pointer to receive the created `HyModule`. + */ +HyResult hyLoadModule(const struct HyInstance *instance, + const uint8_t *pDataPtr, + uint32_t dataLen, + struct HyModule **pModule); + +/** + * Destroys a module loaded by `hyLoadModule`. + * + * # Safety + * - The `module` pointer must be a valid, non-null pointer to a `HyModule`. + */ +void hyDestroyModule(struct HyModule *module); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif /* _HYCORE_H */ diff --git a/cffi/src/lib.rs b/cffi/src/lib.rs new file mode 100644 index 0000000..fc2a04d --- /dev/null +++ b/cffi/src/lib.rs @@ -0,0 +1,579 @@ +use std::{ + ffi::CString, + os::raw::{c_char, c_void}, + sync::{Arc, Weak}, +}; + +use hycore::{ + base::{ + api::{ModuleSourceInfo, ModuleSourceType, VersionInfo}, + InstanceContext, ModuleKey, + }, + ext::hylog::LogLevelEXT, + hywarn, + utils::{error::HyErrorType, opaque::OpaqueList}, +}; +use strum::FromRepr; + +pub struct HyInstance(Arc); +pub struct HyModule(ModuleKey, Weak); + +/// cbindgen:rename-all=ScreamingSnakeCase +#[repr(u32)] +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum HyResult { + HyResultSuccess, + HyResultInvalidPointer, + HyResultIoError, + HyResultOutOfMemory, + HyResultManifestParseError, + HyResultUnknown, + HyResultPluginNotFound, + HyResultUtf8Error, + HyResultInstrError, + HyResultKeyNotFound, + HyResultStructureTypeMismatch, + HyResultDuplicatedKey, +} + +/// cbindgen:rename-all=ScreamingSnakeCase +#[repr(u32)] +#[derive(Clone, Copy, PartialEq, Eq, FromRepr)] +pub enum HyLogLevelEXT { + HyLogLevelTrace = 0, + HyLogLevelDebug = 1, + HyLogLevelInfo = 2, + HyLogLevelWarn = 3, + HyLogLevelError = 4, +} + +/// cbindgen:rename-all=ScreamingSnakeCase +#[repr(u32)] +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum HyStructureType { + HyStructureTypeInstanceCreateInfo, + HyStructureTypeApplicationInfo, + HyStructureTypeModuleCompileInfo, + HyStructureTypeModuleSourceInfo, + HyStructureTypeLogCreateInfoEXT = 0x10000000, +} + +impl Into for LogLevelEXT { + fn into(self) -> HyLogLevelEXT { + HyLogLevelEXT::from_repr(self as u32).expect("Invalid LogLevelEXT value") + } +} + +impl From for LogLevelEXT { + fn from(value: HyLogLevelEXT) -> Self { + LogLevelEXT::from_repr(value as u32).expect("Invalid HyLogLevelEXT value") + } +} + +impl From for HyResult { + fn from(value: hycore::utils::error::HyError) -> Self { + let value_type: HyErrorType = value.into(); + match value_type { + HyErrorType::IoError => HyResult::HyResultIoError, + HyErrorType::ManifestParseError => HyResult::HyResultManifestParseError, + HyErrorType::PluginNotFound => HyResult::HyResultPluginNotFound, + HyErrorType::Utf8Error => HyResult::HyResultUtf8Error, + HyErrorType::HyInstrError => HyResult::HyResultInstrError, + HyErrorType::KeyNotFound => HyResult::HyResultKeyNotFound, + HyErrorType::DuplicatedKey => HyResult::HyResultDuplicatedKey, + HyErrorType::Unknown => HyResult::HyResultUnknown, + } + } +} + +// Version info matching hycore::base::api::VersionInfo +#[repr(C)] +#[derive(Clone, Copy)] +pub struct HyVersionInfo { + pub major: u16, + pub minor: u16, + pub patch: u16, +} + +impl Into for HyVersionInfo { + fn into(self) -> VersionInfo { + VersionInfo { + major: self.major, + minor: self.minor, + patch: self.patch, + } + } +} + +// Constants mirroring hycore::base::api::ModuleSourceType +// pub const HY_MODULE_SOURCE_ASSEMBLY: u32 = hycore::base::api::ModuleSourceType::Assembly as u32; + +/// cbindgen:rename-all=CamelCase +#[repr(C)] +pub struct HyApplicationInfo { + pub s_type: HyStructureType, + pub application_version: HyVersionInfo, + pub p_application_name: *const c_char, + pub engine_version: HyVersionInfo, + pub p_engine_name: *const c_char, +} + +/// cbindgen:rename-all=CamelCase +#[repr(C)] +pub struct HyInstanceCreateInfo { + pub s_type: HyStructureType, + pub p_application_info: *const HyApplicationInfo, + pub pp_enabled_extensions: *const *const c_char, + pub enabled_extensions_count: u32, + pub node_id: u32, + pub p_next: *mut c_void, // opaque, must be null for now +} + +/// cbindgen:rename-all=CamelCase +#[repr(C)] +pub struct HyLogMessageEXT { + pub level: HyLogLevelEXT, + pub time_stamp: i64, // Unix timestamp + pub message: *const c_char, + pub module: *const c_char, + pub file: *const c_char, + pub line: u32, + pub thread_name: *const c_char, + pub p_next: *mut c_void, // opaque, must be null for now +} + +#[allow(non_camel_case_types)] +pub type HyLogCallback_PFN = extern "C" fn(message: *mut HyLogMessageEXT); + +/// cbindgen:rename-all=ScreamingSnakeCase +#[repr(u32)] +#[derive(Clone, Copy, PartialEq, Eq, FromRepr)] +pub enum HyModuleSourceType { + HyModuleSourceTypeAssembly, +} + +impl Into for HyModuleSourceType { + fn into(self) -> ModuleSourceType { + match self { + HyModuleSourceType::HyModuleSourceTypeAssembly => ModuleSourceType::Assembly, + } + } +} + +/// cbindgen:rename-all=CamelCase +#[repr(C)] +pub struct HyModuleSourceInfo { + pub s_type: HyStructureType, + pub source_type: HyModuleSourceType, + pub filename: *const c_char, // nullable + pub data: *const u8, +} + +/// cbindgen:rename-all=CamelCase +#[repr(C)] +pub struct HyModuleCompileInfo { + pub s_type: HyStructureType, + pub pp_sources: *const *const HyModuleSourceInfo, + pub sources_count: u32, +} + +/// cbindgen:rename-all=CamelCase +#[repr(C)] +pub struct HyLogCreateInfoEXT { + pub s_type: HyStructureType, + pub level: HyLogLevelEXT, + pub callback: HyLogCallback_PFN, + pub p_next: *mut c_void, // opaque, must be null for now +} + +pub unsafe fn verify_structure_type(element: *const T, expected: HyStructureType) -> bool { + if element.is_null() { + return false; + } + let s_type_ptr = element as *const HyStructureType; + let s_type = unsafe { *s_type_ptr }; + s_type == expected +} + +pub unsafe fn convert_opaque_list_from_next( + mut p_next: *const c_void, +) -> Result { + let mut list = vec![]; + + while p_next != std::ptr::null() { + // Read p_next sType + let s_type = unsafe { + let s_type_ptr = p_next as *const HyStructureType; + *s_type_ptr + }; + + match s_type { + HyStructureType::HyStructureTypeLogCreateInfoEXT => { + let log_create_info = unsafe { + let ptr = p_next as *const HyLogCreateInfoEXT; + &*ptr + }; + + let level: LogLevelEXT = log_create_info.level.into(); + let callback = log_create_info.callback; + + let create_info = hycore::ext::hylog::LogCreateInfoEXT { + level, + callback: hycore::ext::hylog::LogCallbackEXT(Box::new(move |msg| { + let message = CString::new(msg.message.clone()).unwrap_or_default(); + let module = CString::new(msg.module.clone()).unwrap_or_default(); + let file = + CString::new(msg.file.clone().unwrap_or_default()).unwrap_or_default(); + let thread_name = CString::new(msg.thread_name.clone().unwrap_or_default()) + .unwrap_or_default(); + + let mut message = HyLogMessageEXT { + level: msg.level.into(), + time_stamp: msg.timepoint.and_utc().timestamp(), + message: message.as_ptr() as *const c_char, + module: module.as_ptr() as *const c_char, + file: file.as_ptr() as *const c_char, + line: msg.line.unwrap_or(0), + thread_name: thread_name.as_ptr() as *const c_char, + p_next: std::ptr::null_mut(), + }; + let message_ptr: *mut HyLogMessageEXT = &mut message; + callback(message_ptr); + })), + }; + list.push(Box::new(create_info) as Box); + p_next = log_create_info.p_next; + } + _ => { + return Err(HyResult::HyResultStructureTypeMismatch); + } + } + } + + Ok(OpaqueList(list)) +} + +/// Retrieves information about the version of the Hycore library. +/// +/// # Safety +/// - The `pVersionInfo` pointer must be a valid, non-null pointer to a `HyVersionInfo` struct. +///cbindgen:rename-all=CamelCase +#[no_mangle] +pub extern "C" fn hyGetVersionInfo(p_version_info: *mut HyVersionInfo) { + if p_version_info.is_null() { + return; + } + let version = semver::Version::parse(env!("CARGO_PKG_VERSION")).unwrap(); + + unsafe { + *p_version_info = HyVersionInfo { + major: version.major as u16, + minor: version.minor as u16, + patch: version.patch as u16, + }; + } +} + +/// Create an instance of the Hycore library. +/// +/// # Safety +/// - The `pInstanceCreateInfo` pointer must be a valid, non-null pointer to a `HyInstanceCreateInfo` struct. +/// - The `pInstance` pointer must be a valid, non-null pointer to a pointer to `HyInstance`. +///cbindgen:rename-all=CamelCase +#[no_mangle] +pub extern "C" fn hyCreateInstance( + p_instance_create_info: *const HyInstanceCreateInfo, + p_instance: *mut *mut HyInstance, +) -> HyResult { + if p_instance_create_info.is_null() || p_instance.is_null() { + return HyResult::HyResultInvalidPointer; + } + + if unsafe { + !verify_structure_type( + p_instance_create_info, + HyStructureType::HyStructureTypeInstanceCreateInfo, + ) + } { + return HyResult::HyResultStructureTypeMismatch; + } + + // Convert and validate input info + let info_ref = unsafe { &*p_instance_create_info }; + if info_ref.p_application_info.is_null() { + return HyResult::HyResultInvalidPointer; + } + if unsafe { + !verify_structure_type( + info_ref.p_application_info, + HyStructureType::HyStructureTypeApplicationInfo, + ) + } { + return HyResult::HyResultStructureTypeMismatch; + } + let p_application_info = unsafe { &*info_ref.p_application_info }; + + let app_name = if p_application_info.p_application_name.is_null() { + String::new() + } else { + unsafe { + std::ffi::CStr::from_ptr(p_application_info.p_application_name) + .to_string_lossy() + .into_owned() + } + }; + + let engine_name = if p_application_info.p_engine_name.is_null() { + String::new() + } else { + unsafe { + std::ffi::CStr::from_ptr(p_application_info.p_engine_name) + .to_string_lossy() + .into_owned() + } + }; + + let application_info = hycore::base::api::ApplicationInfo { + application_version: p_application_info.application_version.into(), + application_name: app_name, + engine_version: p_application_info.engine_version.into(), + engine_name, + }; + + // Convert and validated enabled extensions + let enabled_extensions = + if info_ref.pp_enabled_extensions.is_null() || info_ref.enabled_extensions_count == 0 { + Vec::new() + } else { + let slice = unsafe { + std::slice::from_raw_parts( + info_ref.pp_enabled_extensions, + info_ref.enabled_extensions_count as usize, + ) + }; + slice + .iter() + .map(|&p| { + if p.is_null() { + String::new() + } else { + unsafe { std::ffi::CStr::from_ptr(p).to_string_lossy().into_owned() } + } + }) + .collect() + }; + + // Convert opaque list from pNext + let opaque_list = match unsafe { convert_opaque_list_from_next(info_ref.p_next) } { + Ok(list) => list, + Err(err) => return err, + }; + + let create_info = hycore::base::api::InstanceCreateInfo { + application_info, + enabled_extensions: enabled_extensions, + node_id: info_ref.node_id, + ext: opaque_list, + }; + match hycore::base::api::create_instance(create_info) { + Ok(ctx) => { + let boxed = Box::new(HyInstance(ctx)); + unsafe { + *p_instance = Box::into_raw(boxed); + } + HyResult::HyResultSuccess + } + Err(err) => err.into(), + } +} + +/// Destroys an instance created by `hyCreateInstance`. +/// +/// # Safety +/// - The `instance` pointer must be a valid, non-null pointer to a `HyInstance`. +///cbindgen:rename-all=CamelCase +#[no_mangle] +pub extern "C" fn hyDestroyInstance(instance: *mut HyInstance) { + if instance.is_null() { + return; + } + unsafe { + let instance = Box::from_raw(instance); + + /* Check if there are any modules to remove */ + let instance_ref = &instance.0; + let module_keys: Vec = instance_ref.modules.read().keys().collect(); + if module_keys.len() > 0 { + hywarn!( + instance_ref, + "Some modules were not destroyed before instance destruction. Did you forget to call hyDestroyModule on them? Those modules are still loaded {:?}", + module_keys + ); + } + + drop(instance); + } +} + +/// Compile module sources into a binary format. +/// +/// # Safety +/// - The `instance` pointer must be a valid, non-null pointer to a `HyInstance`. +/// - The `pModuleCompileInfo` pointer must be a valid, non-null pointer to a `HyModuleCompileInfo`. +/// - The `ppDataPtr` and `pDataLen` pointers must be valid, non-null pointers to receive the output data. The caller is responsible for freeing the allocated data using `libc::free`. +/// +///cbindgen:rename-all=CamelCase +#[no_mangle] +pub extern "C" fn hyCompileModule( + instance: *const HyInstance, + p_module_compile_info: *const HyModuleCompileInfo, + pp_data_ptr: *mut *mut u8, + p_data_len: *mut u32, +) -> HyResult { + if instance.is_null() + || p_module_compile_info.is_null() + || pp_data_ptr.is_null() + || p_data_len.is_null() + { + return HyResult::HyResultInvalidPointer; + } + + // Convert and validate input info + let inst = unsafe { &*instance }; + + // Convert compile info + if !unsafe { + verify_structure_type( + p_module_compile_info, + HyStructureType::HyStructureTypeModuleCompileInfo, + ) + } { + return HyResult::HyResultStructureTypeMismatch; + } + let info_ref = unsafe { &*p_module_compile_info }; + + // Convert sources + let sources = if info_ref.pp_sources.is_null() || info_ref.sources_count == 0 { + Vec::new() + } else { + let slice = unsafe { + std::slice::from_raw_parts(info_ref.pp_sources, info_ref.sources_count as usize) + }; + let mut sources_vec = Vec::with_capacity(slice.len()); + for &source_ptr in slice { + if source_ptr.is_null() { + return HyResult::HyResultInvalidPointer; + } + if !unsafe { + verify_structure_type(source_ptr, HyStructureType::HyStructureTypeModuleSourceInfo) + } { + return HyResult::HyResultStructureTypeMismatch; + } + let source_ref = unsafe { &*source_ptr }; + let filename = if source_ref.filename.is_null() { + None + } else { + Some(unsafe { + std::ffi::CStr::from_ptr(source_ref.filename) + .to_string_lossy() + .into_owned() + }) + }; + let data = if source_ref.data.is_null() { + String::new() + } else { + // For simplicity, assume data is null-terminated string + unsafe { + std::ffi::CStr::from_ptr(source_ref.data as *const c_char) + .to_string_lossy() + .into_owned() + } + }; + let source_type: ModuleSourceType = source_ref.source_type.into(); + sources_vec.push(ModuleSourceInfo { + source_type, + filename, + data, + }); + } + sources_vec + }; + + // Create compile info + let compile_info = hycore::base::api::ModuleCompileInfo { sources }; + + // Compile sources + match hycore::base::api::compile_sources(&inst.0, compile_info) { + Ok(buf) => { + let len = buf.len() as usize; + if len >= u32::MAX as usize { + return HyResult::HyResultOutOfMemory; + } + + unsafe { + let ptr = libc::malloc(len) as *mut u8; + if ptr.is_null() { + return HyResult::HyResultOutOfMemory; + } + std::ptr::copy_nonoverlapping(buf.as_ptr(), ptr, len); + *pp_data_ptr = ptr; + *p_data_len = len as u32; + } + + HyResult::HyResultSuccess + } + Err(err) => err.into(), + } +} + +/// Loads a compiled module from binary data. +/// +/// # Safety +/// - The `instance` pointer must be a valid, non-null pointer to a `HyInstance`. +/// - The `pDataPtr` pointer must be a valid, non-null pointer to the compiled module data. +/// - The `dataLen` must be the correct length of the compiled module data. +/// - The `pModule` pointer must be a valid, non-null pointer to receive the created `HyModule`. +///cbindgen:rename-all=CamelCase +#[no_mangle] +pub extern "C" fn hyLoadModule( + instance: *const HyInstance, + p_data_ptr: *const u8, + data_len: u32, + p_module: *mut *mut HyModule, +) -> HyResult { + if instance.is_null() || p_data_ptr.is_null() || p_module.is_null() { + return HyResult::HyResultInvalidPointer; + } + + let inst = unsafe { &*instance }; + let data = unsafe { std::slice::from_raw_parts(p_data_ptr, data_len as usize) }; + match hycore::base::api::load_module(&inst.0, data) { + Ok(module_key) => { + let boxed = Box::new(HyModule(module_key, Arc::downgrade(&inst.0))); + unsafe { + *p_module = Box::into_raw(boxed); + } + HyResult::HyResultSuccess + } + Err(err) => err.into(), + } +} + +/// Destroys a module loaded by `hyLoadModule`. +/// +/// # Safety +/// - The `module` pointer must be a valid, non-null pointer to a `HyModule`. +///cbindgen:rename-all=CamelCase +#[no_mangle] +pub extern "C" fn hyDestroyModule(module: *mut HyModule) { + if module.is_null() { + return; + } + unsafe { + let boxed_module = Box::from_raw(module); + if let Some(instance) = boxed_module.1.upgrade() { + let _ = instance.remove_module_by_key(boxed_module.0); + } + drop(boxed_module); + } +} diff --git a/docs/Introduction.md b/docs/Introduction.md deleted file mode 100644 index 9d01bb1..0000000 --- a/docs/Introduction.md +++ /dev/null @@ -1,98 +0,0 @@ -# Introduction - -This document provides an introduction to the internal terminology and concepts used within the Hyperion framework. It aims to clarify the nomenclature and design decisions that underpin the system, facilitating a better understanding for developers and users alike. - -## Program and instructions - -Hyperion operates on a logical intermediate representation (IR) of programs that closely matches the [llvm IR](https://llvm.org/docs/LangRef.html). Notable features of this IR include: - - **SSA Form**: Single Static Assignment form, where each variable is assigned exactly once, simplifying data flow analysis. - - **Typed Instructions**: Each instruction in the IR is associated with a specific type, ensuring type safety during transformations and optimizations. - - **Blocks and Control Flow**: The IR is structured into basic blocks, with explicit control flow between them, allowing for clear representation of program logic. - -An instruction $\mathcal{I}$ in hyperion is represented as a tuple: -$$\mathcal{I} = (op, dst, src_1, src_2, ..., src_n)$$ -where: - - $op$ is the operation code (opcode) defining the operation to be performed. - - $dst$ is the destination operand where the result of the operation is stored. - - $src_1, src_2, ..., src_n$ are the source operands that provide the input values for the operation. - -A terminator $\mathcal{T}$ is not an instruction but a special kind of operation that marks the end of a basic block and defines the control flow to subsequent blocks. Examples of terminators include branches, returns, and jumps. - -A block $\mathcal{B}$ is a sequence of instructions followed by a terminator. It can be represented as: -$$\mathcal{B} = \{\mathcal{I}_1, \mathcal{I}_2, \cdots, \mathcal{I}_m, \mathcal{T}\}$$ -where $\mathcal{I}_1, \mathcal{I}_2, ..., \mathcal{I}_m$ are the instructions in the block, and $\mathcal{T}$ is the terminator. - -A function $\mathcal{F}$ is a collection of blocks that together define a complete unit of computation. It can be represented as: -$$\mathcal{F} = \{\mathcal{B}_1, \mathcal{B}_2, \cdots, \mathcal{B}_n\}$$ -where $\mathcal{B}_1, \mathcal{B}_2, ..., \mathcal{B}_n$ are the blocks in the function. - -A program $\mathcal{P}$ is a collection of functions, each consisting of multiple blocks. It can be represented as: -$$\mathcal{P} = \{\mathcal{F}_1, \mathcal{F}_2, \cdots, \mathcal{F}_k\}$$ -where $\mathcal{F}_1, \mathcal{F}_2, ..., \mathcal{F}_k$ are the functions in the program. - -## Function equivalence - -Consider two functions $f, g \in \mathcal{F}$, and a program $\mathcal{P}$ such that -1. $f$ and $g$ have the same type signature, i.e., they accept the same number and types of arguments and return the same type. -2. $\mathcal{P}\{f \rightarrow g\}$ is observably equivalent to $\mathcal{P}$ in that for all states $s$, executing $\mathcal{P}$ from $s$ yields the same observable behavior as executing $\mathcal{P}\{f \rightarrow g\}$ from $s$. - -Where $\mathcal{P}\{f \rightarrow g\}$ denotes the program $\mathcal{P}$ with all calls to function $f$ replaced by calls to function $g$. - -If the above conditions hold, we say that functions $f$ and $g$ are *equivalent* in the context of program $\mathcal{P}$. We note this as $f \leftrightsquigarrow g$. - -Similarly, we say that $f$ and $g$ are *equivalent under $\mathcal{C}$* for some set of preconditions $\mathcal{C}$ if $f$ and $g'$ are equivalent where $g'$ is defined as: -$$g'(x) = \begin{cases} -g(x) & \text{if } \mathcal{C}(x) \text{ holds} \\ -f(x) & \text{otherwise} -\end{cases}$$ -We note this as $f \leftrightsquigarrow_{\mathcal{C}} g$. - -## Postconditions and sufficient equivalence postconditions - -A *postcondition* is a logical assertion that describes the expected state of the program after the execution of a function. Formally a postcondition $P$ for a function $f$ and a precondition $C$ is a predicate over the program state $s$ such that -$$ -\forall s. C(s) \implies P(\Gamma_f(s)) -$$ -where $\Gamma_f(s)$ denotes the state of the program after executing function $f$ from state $s$. - -A set of postconditions $\{P_1, P_2, \ldots, P_n\}$ is said to be *sufficient for equivalence* under a precondition $C$ if -$$ -\begin{aligned} - \forall s.&\; C(s) \land (P_1(\Gamma_f(s)) \land P_2(\Gamma_f(s)) \land \ldots \land P_n(\Gamma_f(s))) \\ - &\land (P_1(\Gamma_g(s)) \land P_2(\Gamma_g(s)) \land \ldots \land P_n(\Gamma_g(s))) \\ - \implies& f \leftrightsquigarrow_{\{C\}} g -\end{aligned} -$$ - -## A note on proof and axiomatic reasoning. - -In the *hyperion* framework, we write all proof as program that check the validity of certain conditions. For instance, if we want to make -an argument about a function $f = \{ \mathcal{B}_1, \mathcal{B}_2, \ldots, \mathcal{B}_n \}$, we write a series of *meta-instructions* that -add assertions about `f`'s behavior at various points in its execution. - -- We introduce an `Assert` meta-instruction which is a `no-op` at runtime, but is used to ensure that a `i1` value is **ALWAYS** true. We then allow -to add new condition and checks at different points. For instance loop-invariants can be seen as a condition `assert %cond` in the body of a loop. -- We also introduce the notion of `free-variable` for reasoning about preconditions/postconditions. For instance, a precondition that check a list is sorted -```ll -; Defined above %list_ptr, %n: i32 - -%i = free_variable i32 -%ii = add i32 %i, 1 -%cond = icmp slt i32 %ii, %n -assume %cond -%index = getelementptr i32, i32* %list_ptr, i32 %i -%val1 = load i32, i32* %index ; Only load if %cond is true -%val2 = load i32, i32* %index ; Only load if %cond is true -%is_sorted = icmp sle i32 %val1, %val2 -assert i1 %is_sorted -``` - -The difference between `assume` and `assert` is that `assume` tells the proof engine to only consider paths where the condition is true. It should in theory not be used -directly when doing axiom derivation unless for free-variables. Can be used to "hide" complex proofs. If $A$ is true due to proof $P$, then `assume A` can be used for compression. - -## A note on genericity and abstractions. - -When optimizing code and proving equivalence, it is often useful to `abstract` away other functions. For instance, consider a hashmap implementation that uses -a hash function `hash_func`. When reasoning about the hashmap operations, we may not care about the actual implementation of `hash_func`, but only about its -properties. -The property of a hash function is VERY hard to prove. Furthermore, in many cases it is possible to construct hash collisions. diff --git a/docs/PluginSystem.md b/docs/PluginSystem.md deleted file mode 100644 index 676be65..0000000 --- a/docs/PluginSystem.md +++ /dev/null @@ -1,156 +0,0 @@ -# Hyperion Plugin Runtime - -The modern Hyperion plugin surface is centered on the `hycore::ext` module. It exposes the traits, -registration helpers, and inventory lookups that let downstream crates declare extensions without -writing any `dlopen` boilerplate. - -This document complements the inline Rustdoc comments and walks through how to author, register, and -consume extensions in the new architecture. - -## Runtime primitives - -`hycore/src/ext/mod.rs` contains the host-facing pieces: - -- **Traits** - - `StaticPluginEXT` encodes compile-time metadata (`UUID`, `NAME`, `DESCRIPTION`) and provides the - `new(&mut OpaqueList)` constructor used when an extension is instantiated. - - `PluginEXT` describes runtime hooks (`attach_to`, `initialize`, `teardown`) that the host calls as - an instance is built and destroyed. - - `DynPluginEXT` is the object-safe supertrait used to store heterogeneous plugins inside an - `InstanceContext`. -- **Registry** - - `PluginRegistry` is an inventory entry describing how to construct a concrete plugin. - - `define_plugin!(MyPlugin)` expands to a static `PluginRegistry` submission. It automatically wires - the loader closure so the host can ask for the plugin by name. -- **Loader** - - `load_plugin_by_name(name, ext_list)` is the single entrypoint `InstanceContext::create` uses to - turn user-supplied strings into boxed plugin values. -- **Opaque configuration** - - `utils::opaque::OpaqueList` is how extensions receive typed configuration at startup. Each plugin - extracts the structs it understands and leaves the rest untouched. - -The legacy dynamic loading code (`hycore::base::ext`) has been removed. Inventory submissions make -plugins feel like first-class Rust components while still supporting dynamic composition. - -## Instance lifecycle - -1. **Create info**. Callers assemble `api::InstanceCreateInfo`, filling the application metadata, - enabled extension names, and optional opaque config objects. Python bindings expose the same type - via `hypi.api.InstanceCreateInfo`. -2. **Plugin instantiation**. `InstanceContext::create` iterates over the requested extension names and - calls `load_plugin_by_name`. The loader finds the matching `PluginRegistry` entry and runs its - constructor. -3. **Attachment**. Once all plugins exist, Hyperion builds the shared `Arc` and calls - `PluginEXT::attach_to` so extensions can store a `Weak` pointer. -4. **Initialization**. With the `Arc` finalized, each plugin receives `initialize()`. This is where - per-instance state (log handlers, callbacks, caches) should be registered. -5. **Steady state**. Extensions run side-by-side with core modules. They can call back into the host - using the instance pointer provided earlier. -6. **Teardown**. Dropping the `InstanceContext` drains the `extensions` map and invokes - `PluginEXT::teardown` in reverse order of insertion. Use this hook to release resources or detach - callbacks. - -## Building a plugin - -Below is a minimal example showing how to implement and register a plugin crate. - -```rust -use hycore::{ - define_plugin, - ext::{PluginEXT, StaticPluginEXT}, - utils::{error::HyResult, opaque::OpaqueList}, -}; -use uuid::{Uuid, uuid}; -use std::sync::Weak; - -pub struct FooPlugin { - instance: Option>, -} - -define_plugin!(FooPlugin); - -impl StaticPluginEXT for FooPlugin { - const UUID: Uuid = uuid!("cdb726aa-8656-486f-a5b5-ff09f37a83fb"); - const NAME: &'static str = "__EXT_FOO"; - const DESCRIPTION: &'static str = "Provides foo-related features"; - - fn new(_ext: &mut OpaqueList) -> Self { - Self { instance: None } - } -} - -impl PluginEXT for FooPlugin { - fn attach_to(&mut self, instance: Weak) { - self.instance = Some(instance); - } - - fn initialize(&self) -> HyResult<()> { - // Instance has been fully constructed at this point. - Ok(()) - } - - fn teardown(&mut self) { - // Clean up global state if needed. - } -} -``` - -Any crate that links `hycore` can submit a plugin like this. Once compiled, the plugin simply needs to -be included in the binary or dependency graph; the inventory table ensures `load_plugin_by_name` -locates it. - -## Logger extension case study - -The built-in logger under `hycore::ext::hylog` demonstrates the full flow: - -- `LogCreateInfoEXT` is an opaque configuration object (also exposed to Python) that carries the log - level and callback. -- `LogPluginEXT` implements the traits and registers itself via `define_plugin!(LogPluginEXT)`. -- During `initialize`, the plugin installs `log_message` into `InstanceStateEXT::log_callback` so all - `hytrace!`, `hyinfo!`, etc. invocations reach the user callback. - -Refer to `hycore/src/ext/hylog/impl.rs` for a comprehensive reference implementation. - -## Python bindings - -`python/python/hypi/api` mirrors the Rust structs with Pydantic dataclasses, keeping the experience -consistent across languages. The `create_instance` helper converts those dataclasses back into the C -ABI structs that `hycore::base::api` understands. - -Extensions that expect opaque configuration types should call the `define_py_opaque_object_loaders!` -macro to register Python loaders. The logger plugin already registers `LogCreateInfoEXT` so scripts can -write: - -```python -from hypi.api import ( - ApplicationInfo, - InstanceCreateInfo, - InstanceEXT, - Version, - create_instance, -) -from hypi.api.ext.hylog import LogCreateInfoEXT, LogLevelEXT - -create_info = InstanceCreateInfo( - application_info=ApplicationInfo( - application_name="Notebook", - application_version=Version.parse("0.1.0"), - engine_name="Hyperion", - engine_version=Version.parse("0.1.1"), - ), - enabled_extensions=[InstanceEXT.LOGGER.value], - ext=[LogCreateInfoEXT(level=LogLevelEXT.DEBUG, callback=lambda msg: print(msg.message))], -) - -instance = create_instance(create_info) -``` - -## Best practices - -- **Stabilize UUIDs**: Treat UUID changes as breaking; hosts rely on them as stable identifiers. -- **Fail fast**: Prefer returning `HyResult` errors over panicking. The host surfaces these failures to - callers. -- **Use `OpaqueList` judiciously**: Remove configuration objects once consumed to avoid accidental - reuse by other extensions. -- **Keep `initialize` lightweight**: Heavy setup should be deferred or cached lazily to minimize - startup time. diff --git a/docs/Roadmap.md b/docs/Roadmap.md deleted file mode 100644 index 91cb44f..0000000 --- a/docs/Roadmap.md +++ /dev/null @@ -1,17 +0,0 @@ -# Roadmap - -## Immediate Next Steps - -- [x] Extend the specification language to support `meta-instructions` for assumptions and assertions. -- [x] Implement parsing and serialization of specifications to/from a human-readable format (e.g., JSON or YAML). -- [x] Add meta-instructions for complexity analysis (probabilistic time and space complexity). -- [ ] Rework complexity analysis to support multi-function and complex complexity (like `O(n) call to f` inside a loop). -- [ ] Implement instruction for insertion/extraction of (1) structured data and (2) arrays and vectors. -- [ ] Implement the `meta-behavior` instruction family to check whether a function call - terminates, crashes, or loops based on the `HaltingBehavior` specification. - -## Mid-Term Goals - -- [ ] Develop derivers for simple specifications (find loop invariants, preconditions, postconditions). -- [ ] Implement a verification engine that can check function equivalence based on provided specifications. -- [ ] Implement searching of target conditions for equivalence using SMT solvers. diff --git a/docs/report/data/hyperion-icon.svg b/docs/report/data/hyperion-icon.svg new file mode 100644 index 0000000..dab01fb --- /dev/null +++ b/docs/report/data/hyperion-icon.svg @@ -0,0 +1,13 @@ + + + + + Layer 1 + + + + + + + + \ No newline at end of file diff --git a/docs/report/main.typ b/docs/report/main.typ new file mode 100644 index 0000000..f4b5aa2 --- /dev/null +++ b/docs/report/main.typ @@ -0,0 +1,139 @@ +#import "@preview/codly:1.3.0": * +#import "@preview/codly-languages:0.1.1": * +#import "@preview/cetz:0.4.2": * +#show: codly-init.with() + +#let report_title = "Hyperion: Technical Report" +#let report_subtitle = "Building a Framework for High-Level Optimizations, for Massively Scalable Programs" +#let report_author = "Guillaume Boyé" +#let report_date = datetime.today() + +#set page( + margin: (top: 22mm, bottom: 22mm, left: 20mm, right: 20mm), +) + +#set text( + font: "Libertinus Serif", + ligatures: true, + size: 10.5pt, +) + +#set par( + justify: true, +) + +#let calc-margin(margin, shape) = if margin == auto { + 2.5 / 21 * calc.min(..shape) +} else { + margin +} + +#show heading.where(level: 1): it => { + pagebreak(weak: true) + set text( + size: 1.3em, + weight: "regular", + ) + set align(right) + block(above: 20pt, below: 50pt, context { + let title-content = { + if heading.numbering != none { + smallcaps([Chapter #counter(heading).display(heading.numbering)]) + linebreak() + } + + text(size: 1.3em)[*#it.body*] + } + title-content + place(dx: calc-margin(page.margin.right, (page.width, page.height)), horizon + right, rect( + fill: black, + height: measure(title-content).height, + )) + }) +} + +// #set heading(numbering: "1.1.") +#show heading.where(level: 2): it => { + set text(size: 1.5em, weight: "regular") + set align(left) + block( + above: 30pt, + below: 15pt, + // fill: red, + context { + let title-content = { + smallcaps[*Section #counter(heading).display(heading.numbering) *] + h(2mm) + } + + title-content + it.body + }, + ) +} + +#set heading(numbering: "1.") + +#show raw: it => text(it, font: "JetBrainsMono NF") +#show raw.where(block: false): it => highlight( + it, + fill: luma(247), + radius: 0.5pt, + extent: 1pt, + top-edge: 1em, +) +#codly(zebra-fill: luma(248)) + +#set document( + title: report_title, + author: report_author, + date: report_date, +) + +#let title_page() = context [ + #align(center)[ + #v(20mm) + #text(size: 26pt, weight: "bold")[#report_title] + #v(5mm) + #text(size: 12pt, fill: luma(80))[#report_subtitle] + #v(12mm) + + #image( + "data/hyperion-icon.svg", + width: 120mm, + ) + + #v(1fr) + + #grid( + columns: 1fr, + row-gutter: 4mm, + [#text(size: 11pt)[#report_author]], + [#text(size: 10pt, fill: luma(90))[#report_date.display()]], + ) + + #v(20mm) + ] +] + +#show: document => [ + #title_page() + #pagebreak() + #outline(depth: 2) + #pagebreak() + #document +] + +#include "sections/00-introduction.typ" +#pagebreak() + +#include "sections/01-roadmap.typ" +#pagebreak() + +#include "sections/02-codebase-overview.typ" +#pagebreak() + +#include "sections/03-ir-spec.typ" +#pagebreak() + +#include "sections/04-theorem-derivation.typ" diff --git a/docs/report/sections/00-introduction.typ b/docs/report/sections/00-introduction.typ new file mode 100644 index 0000000..cc29741 --- /dev/null +++ b/docs/report/sections/00-introduction.typ @@ -0,0 +1,10 @@ += Introduction + +Hyperion is a framework designed to enable scalable, *high-performance applications* in a *platform-agnostic way*. Its goal is to let developers write an algorithm once and then obtain efficient execution across a wide range of targets, including single-core `CPU`s, multicore `CPU`s, multi-node clusters, `GPU`s, `TPU`s, and `FPGA`s. + +Today, achieving high performance typically requires rewriting the same program several times. A common workflow starts with a simple draft used for testing, followed by a multithreaded version for multicore CPUs. If performance is still insufficient, the program may be rewritten again for distributed execution across multiple CPU nodes, and later rewritten yet again to target accelerators such as GPUs. In embedded contexts, further rewrites are often needed to fit the constraints and programming model of FPGAs or specialized hardware. This process is time consuming, difficult to maintain, and error prone, and it makes it hard to keep a single codebase working reliably across all contexts. Addressing this problem is the core objective of Hyperion. + +Hyperion aims to achieve this through four main components. First, it relies on a generic intermediate representation, inspired by LLVM IR, to express programs in a way that is independent of any single architecture. Second, it introduces optimization methods that go beyond traditional compiler passes by using formal verification and automatic theorem derivation to discover sound program transformations. These transformations can enable non-obvious changes, including changes that reduce the asymptotic complexity of an algorithm without changing its behavior. Third, Hyperion uses the derived theorems to automate parallelization, generating parallel implementations when correctness can be justified by the underlying proofs. Fourth, Hyperion targets heterogeneous execution, with the goal of running and scaling programs across mixed clusters of devices. In particular, it aims to make cross-vendor accelerator scaling practical, such as running across both NVIDIA and AMD GPUs, which is still difficult or unsupported in many existing frameworks. + +To support different deployment scenarios, Hyperion is planned to operate in two complementary modes. In an ahead-of-time compilation mode, it should be able to generate compiled binaries and standalone programs suitable for deployment, including on embedded systems. In a just-in-time mode, it is intended to compile and optimize at runtime, enabling a tradeoff between compilation cost and execution speed. This would allow kernels to be specialized based on observed usage and input characteristics, and would enable dynamic scheduling decisions when previously unseen bottlenecks arise during execution. + diff --git a/docs/report/sections/01-roadmap.typ b/docs/report/sections/01-roadmap.typ new file mode 100644 index 0000000..2fa1d6e --- /dev/null +++ b/docs/report/sections/01-roadmap.typ @@ -0,0 +1,54 @@ +#import "@preview/cheq:0.3.0": checklist + +#show: checklist.with(fill: luma(95%), radius: .2em, stroke: blue) + += Roadmap + +This section outlines the planned roadmap for the development and enhancement of the Hyperion system. The roadmap is divided into short-term, mid-term, and long-term goals, each focusing on different aspects of the system's capabilities and performance. + +== Short-Term Goals + +- [x] Add `Instance` to represent a library instance. +- [x] Add extension mechanism with plugin systems. +- [x] Add basic python bindings with `pyo3` and `maturin`. +- [x] Implement automated CI for python bindings. +- [x] Add basic C bindings with `cbindgen` and compatibility layer. +- [x] Add parser for simple logical expressions and able to parse basic assembly in custom IR syntax. +- [x] Add `fmt` support for core data structures. +- [x] Add compilation API endpoints and compile to internal representation (serialized format of the AST) +- [x] Allow to load from serialized IR +- [x] Add zstd to compress serialized IR +- [ ] Add `Device` to represent different hardware device that can be targeted (e.g., CPU, GPU, TPU). +- [?] Add `Network` to represent a network between devices, (need to be designed) +- [ ] Add `DeviceCluster` to represent a cluster/group of such devices. +- [ ] Add `Executor` trait to represent execution strategies on devices or clusters. +- [ ] Implement first `Executor`, a simple single-threaded evaluator. +- [x] Construct core IR data structures +- [x] Build typesystem to support typed IR +- [!] Add type checker for the IR +- [ ] Implement basic IR theorem-derivation and proof system +- [ ] Implement theorem derivation strategies +- [ ] Build equivalent function from theorem +- [ ] Figure out memory schema and management within the formal system + +== Mid-Term Goals + +- [ ] Construct plugin to transpile from custom IR to LLVM IR +- [ ] Build executor to run compiled code using LLVM JIT +- [ ] Integrate basic codegen: x86, ARM +- [ ] Integrate codegen on GPU: CUDA, ROCm. Integrate with their respective drivers. +- [ ] Implement algorithm reusal and caching with pre-built proofs +- [ ] Add basic complexity analysis for IR functions +- [ ] Figure out multi-threaded concurrent program and how to reason and build proofs about them and their execution. +- [ ] Start massively parallel execution strategies on clusters of devices. + +== Long-Term Goals + +- [ ] Attempt compiling python to our IR using custom frontend +- [ ] Similar with some low language with LLVM IR frontend (e.g., Rust, C, C++). Probably write bindings and variant to allow for easier interop. +- [ ] Conceptualize a language frontend for generating IR +- [ ] Implement this frontend +- [ ] Build standard library of optimized routines +- [ ] Figure out shared filesystem + + diff --git a/docs/report/sections/02-codebase-overview.typ b/docs/report/sections/02-codebase-overview.typ new file mode 100644 index 0000000..d939ae6 --- /dev/null +++ b/docs/report/sections/02-codebase-overview.typ @@ -0,0 +1,156 @@ +#import "@preview/cetz:0.4.2" + +#let vc_add = (coordinate, delta) => (coordinate.at(0) + delta.at(0), coordinate.at(1) + delta.at(1)) +#let vc_sub = (coordinate, delta) => (coordinate.at(0) - delta.at(0), coordinate.at(1) - delta.at(1)) +#let vc_scale = (coordinate, factor) => (coordinate.at(0) * factor, coordinate.at(1) * factor) +#let vc_midpoint = (coord_a, coord_b, pos: 50%) => { + let t = pos / 100% + ( + coord_a.at(0) + t * (coord_b.at(0) - coord_a.at(0)), + coord_a.at(1) + t * (coord_b.at(1) - coord_a.at(1)), + ) +} +#let vc_distance = (coord_a, coord_b) => { + let dx = coord_b.at(0) - coord_a.at(0) + let dy = coord_b.at(1) - coord_a.at(1) + calc.sqrt(dx * dx + dy * dy) +} + +#let blob = (_content, coord_a, size: (3, 1), color: red, padding: 1, text_padding: 0.3, text_anchor: "center") => { + import cetz.draw: * + + let coord_a = vc_scale(coord_a, 1 + padding) + let coord_b = vc_add(coord_a, size) + + let coord_c = vc_midpoint(coord_a, coord_b) + + if text_anchor.starts-with("top-") { + coord_c = (coord_c.at(0), coord_b.at(1) - text_padding) + } else if text_anchor.starts-with("bottom-") { + coord_c = (coord_c.at(0), coord_a.at(1) + text_padding) + } + if text_anchor.ends-with("-left") { + coord_c = (coord_a.at(0) + text_padding, coord_c.at(1)) + } else if text_anchor.ends-with("-right") { + coord_c = (coord_b.at(0) - text_padding, coord_c.at(1)) + } + + rect( + coord_a, + coord_b, + fill: color.lighten(60%), + stroke: color.darken(30%) + 0.5mm, + anchor: "center", + radius: 2mm, + ) + content(coord_c, text(fill: color.darken(60%), size: 1.1em, font: "Open Sans", _content)) +} + +#let arrow = (from, to, color: black, width: 0.5mm, side: 3, head-size: 0.1, padding: 1, horizontal: false) => { + import cetz.draw: * + let from = vc_scale(from, 1 + padding) + let to = vc_scale(to, 1 + padding) + + let mid_a = (0, 0) + let mid_b = (0, 0) + if (horizontal) { + mid_a = (to.at(0), from.at(1)) + mid_b = (from.at(0), to.at(1)) + } else { + mid_a = (from.at(0), to.at(1)) + mid_b = (to.at(0), from.at(1)) + } + + let direction = vc_scale(vc_sub(to, mid_b), 1 / (vc_distance(mid_b, to) + 1e-3)) + let angle = calc.atan2(direction.at(0), direction.at(1)) + let to = vc_sub(to, vc_scale(direction, head-size)) + + // Project mid on either the + // line(from, mid_a, mid_b, to, stroke: blue + width) + bezier( + from, + to, + mid_a, + mid_b, + stroke: color + width, + ) + polygon( + to, + side, + angle: angle + 0deg, + radius: head-size, + fill: color, + ) +} + += Codebase overview + +Hyperion's codebase is structured to promote a clear separation between the core IR language, the runtime/engine that consumes it, and the integration surfaces (bindings and examples) that exercise stable abstractions. This section provides an architectural overview of these components and their interactions. + +== Architecture at a glance + +#figure( + cetz.canvas( + { + import cetz.draw: * + // Public API + blob([Public API], (1.8, -1.4), size: (3.8, 4.2), color: yellow.lighten(60%), text_anchor: "bottom-center") + blob([Instance], (2, 0), size: (3, 1), color: yellow) + blob([Module], (2, -1), size: (3, 1), color: yellow) + + // IR components + blob([IR], (3.8, -3.4), size: (7.8, 8.2), color: red.lighten(60%), text_anchor: "bottom-center") + blob([Module], (5, 0), size: (3, 1), color: red) + blob([Function], (5, -1), size: (3, 1), color: red) + blob([BasicBlock], (5, -2), size: (3, 1), color: red) + blob([Instruction], (4, -3), size: (3, 1), color: red) + blob([Terminator], (6, -3), size: (3, 1), color: red) + + // Optimizer components + blob([Theorem Library], (8, 0), size: (4, 1), color: blue) + blob([Theorem], (8.25, -1), size: (3, 1), color: blue) + blob([State], (8.25, -2), size: (3, 1), color: color.navy) + + arrow((2.75, 0), (2.75, -0.48), side: 4) + + arrow((5.75, -.01), (5.75, -0.48), side: 4) + arrow((5.75, -1.01), (5.75, -1.48), side: 4) + arrow((5.75, -2.01), (4.75, -2.48), side: 4) + arrow((5.75, -2.01), (6.75, -2.48), side: 3) + }, + padding: (5mm, 0), + ), + caption: "High-level architecture of Hyperion's codebase, showing the core IR components (red), public API (yellow), and optimizer components (blue).", +) + + +== API surface + +To simplify the use of `Hyperion`, the framework exposes a facade API that hides internal complexity. This facade serves as the interface between user code and the underlying internal components. It enables (1) standardized interaction patterns, (2) easier maintenance and evolution of internal components while preserving stability and backward and forward compatibility, and (3) multi-language bindings, currently `Python` and `C`. + +Current and planned facade API features include: +- A library `Instance` that owns configuration and extension state. This is the main entry point for users. +- An extension mechanism for plugins, enabling optional features to be registered and discovered. +- Compilation from the textual IR into an internal representation. +- Loading of compiled IR into a `Module` from file or memory. + +Planned milestones extend this core loop into an execution and optimization platform: + +- An `OptimizerPipeline` abstraction constructed from optimization passes. +- Explicit data and execution abstractions: `Buffer`/`BufferView`, `Device`, and higher-level `DeviceCluster` plus `Network` for inter-device communication. +- An `Executor` abstraction that executes functions on a device cluster with buffer-based I/O. +- A progression from low-level optimizations to “smart” pipelining and on-the-fly optimization. + +== Internal architecture overview + +At the core of Hyperion is a single *intermediate representation* (IR) that acts as the "shared language" between compilation, reasoning, optimization, and execution. Unlike conventional compiler IRs that only model programs, Hyperion's IR is intended to represent both the *program being executed* and the *proof artifacts* (proof obligations, derived lemmas, and theorems) about that program. In practice, this means the same structural vocabulary (modules, functions, basic blocks, and instructions) is used to describe executable computations and equivalence-preserving transformations. You can check @ir-section for a detailed specification of the IR. + +Typical usage of Hyperion involves the following steps: + +- *Represent*: ingest user code (via the facade API) and lower it to IR, producing a `Module` of `Function`s. +- *Extract*: analyze each function to identify semantic invariants and candidate rewrite opportunities, expressed as theorem statements over IR fragments. +- *Prove and catalog*: discharge proof obligations and store derived theorems as reusable transformation rules. +- *Synthesize*: when a theorem establishes that an alternative implementation is behaviorally equivalent (or equivalent under explicit preconditions), build a new function that realizes the proven transformation. When possible, these synthesized functions can expose additional structure, such as parallelism, vectorization opportunities, or improved asymptotic behavior. +- *Execute*: select an implementation (original or synthesized) and run it through the execution subsystem, targeting the available devices. + +This architecture makes theorem derivation a first-class optimization mechanism: rather than relying only on local, pattern-based compiler passes, Hyperion seeks to *discover* semantic facts about functions and then use those facts to construct new implementations with the same observable behavior. In the long term, this is what enables the framework to move from "optimizing a given function" to "finding another function that behaves the same way, but is cheaper to run", and then executing that replacement. diff --git a/docs/report/sections/03-ir-spec.typ b/docs/report/sections/03-ir-spec.typ new file mode 100644 index 0000000..95cf6a5 --- /dev/null +++ b/docs/report/sections/03-ir-spec.typ @@ -0,0 +1,389 @@ += Hyperion IR + +This section specifies the *Hyperion IR* as a user-facing language. The goal is to define syntax and semantics precisely enough that (1) derived theorems can rely on a stable model and (2) equivalence-preserving rewrites can be stated as transformations over IR fragments. + +The IR is inspired by LLVM IR#footnote[ + LLVM IR is described in detail in the LLVM Language Reference Manual: #link("https://llvm.org/docs/LangRef.html")[https://llvm.org/docs/LangRef.html]. +], but diverges where Hyperion needs explicit semantics (notably integer overflow) and a proof layer (meta-level artifacts). + +== Conceptual model + +Hyperion IR is a typed, block-structured, control-flow graph (CFG) representation organized as *modules* containing *functions*. A function consists of basic blocks; each block contains a sequence of instructions followed by a terminator. + +Key properties: + +- *Static typing*: every value has a type (integers, floats, pointers, structured aggregates). +- *Explicit control flow*: control flow is represented with explicit terminators; edges are not implicit. +- *Semantics-first design*: operations are split not only by “what they compute” but also by “how they compute it” (e.g., explicit overflow modes). + +== Syntax (surface form) + +This document uses a textual syntax close to the parser syntax used throughout the repository. + +A module is a sequence of function declarations/definitions: + +```llvm +; Module defined with following external/internal functions +declare (*) +define (*) { * } +``` + +A function definition contains labeled blocks: + +```llvm +define (*) { +