Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 18 additions & 18 deletions .github/workflows/build-and-run.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,11 @@ jobs:
echo "Detected TornadoVM SDK: $FULL_SDK"

# Export for current shell session
export TORNADO_SDK="$FULL_SDK"
export TORNADOVM_HOME="$FULL_SDK"
export PATH="$FULL_SDK/bin:$JAVA_HOME/bin:$PATH"

# Save for subsequent steps
echo "TORNADO_SDK=$FULL_SDK" >> $GITHUB_ENV
echo "TORNADOVM_HOME=$FULL_SDK" >> $GITHUB_ENV
echo "PATH=$PATH" >> $GITHUB_ENV

echo "=== Checking tornado CLI ==="
Expand All @@ -89,105 +89,105 @@ jobs:
- name: Build GPULlama3.java
run: |
cd ${{ github.workspace }}
echo "Using TORNADO_SDK=$TORNADO_SDK"
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
echo "Using TORNADOVM_HOME=$TORNADOVM_HOME"
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
tornado --version
./mvnw clean package -DskipTests
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
--prompt "Say hello"
- name: FP16 - Run Qwen3-4B-f16.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Qwen3-4B-f16.gguf \
--prompt "Say hello"
- name: FP16 - Run Mistral-7B-Instruct-v0.3.fp16.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Mistral-7B-Instruct-v0.3.fp16.gguf \
--prompt "Say hello"
- name: FP16 - Run Qwen2.5-1.5b-instruct-fp16.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/qwen2.5-1.5b-instruct-fp16.gguf \
--prompt "Say hello"
- name: FP16 - Run Phi-3-mini-4k-instruct-fp16.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model /$MODELS_DIR/Phi-3-mini-4k-instruct-fp16.gguf \
--prompt "Say hello"
- name: FP16 - Run Granite-3.2-2b-instruct-f16.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model /$MODELS_DIR/granite-3.2-2b-instruct-f16.gguf \
--prompt "Say hello"
- name: FP16 - Run Granite-4.0-1b-F16.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model /$MODELS_DIR/granite-4.0-1b-F16.gguf \
--prompt "Say hello"
- name: Q8 - Run Llama-3.2-1B-Instruct-Q8_0.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Llama-3.2-1B-Instruct-Q8_0.gguf \
--prompt "Say hello"
- name: Q8 - Run Qwen3-0.6B-Q8_0.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Qwen3-0.6B-Q8_0.gguf \
--prompt "Say hello"
- name: Q8 - Run Phi-3-mini-4k-instruct-Q8_0.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Phi-3-mini-4k-instruct-Q8_0.gguf \
--prompt "Say hello"
- name: Q8 - Run Qwen2.5-1.5b-instruct-q8_0.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/qwen2.5-1.5b-instruct-q8_0.gguf \
--prompt "Say hello"
- name: Q8 - Mistral-7B-Instruct-v0.3.Q8_0.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Mistral-7B-Instruct-v0.3.Q8_0.gguf \
--prompt "Say hello"
- name: Q8 - Run Granite-3.2-2b-instruct-Q8.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model /$MODELS_DIR/granite-3.2-2b-instruct-Q8_0.gguf \
--prompt "Say hello"
- name: Q8 - Run Granite-4.0-1b-Q8_0.gguf
run: |
cd ${{ github.workspace }}
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model /$MODELS_DIR/granite-4.0-1b-Q8_0.gguf \
--prompt "Say hello"
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/deploy-maven-central.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ name: Deploy to Maven Central
on:
push:
tags:
- 'v*'
- '[0-9]+.[0-9]+.[0-9]+*'
- 'v[0-9]+.[0-9]+.[0-9]+'
workflow_run:
workflows: ["Finalize GPULlama3 Release"]
types: [completed]
Expand Down
4 changes: 2 additions & 2 deletions LlamaTornadoCli.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
//JAVA 21
//PREVIEW
//DEPS io.github.beehive-lab:gpu-llama3:0.3.2-dev
//DEPS io.github.beehive-lab:tornado-api:2.1.0
//DEPS io.github.beehive-lab:tornado-runtime:2.1.0
//DEPS io.github.beehive-lab:tornado-api:2.2.0
//DEPS io.github.beehive-lab:tornado-runtime:2.2.0

//SOURCES TornadoFlags.java
// === Set to not get annoying warnings about annotation processing
Expand Down
60 changes: 12 additions & 48 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,6 @@ Ensure you have the following installed and configured:

### Install, Build, and Run

When cloning this repository, use the `--recursive` flag to ensure that TornadoVM is properly included as submodule:

```bash
# Clone the repository with all submodules
git clone https://github.com/beehive-lab/GPULlama3.java.git
Expand All @@ -80,66 +78,32 @@ git clone https://github.com/beehive-lab/GPULlama3.java.git
#### Install the TornadoVM SDK on Linux or macOS

Ensure that your JAVA_HOME points to a supported JDK before using the SDK. Download an SDK package matching your OS, architecture, and accelerator backend (opencl, ptx).
All pre-built SDKs are available on the TornadoVM [Releases Page](https://github.com/beehive-lab/TornadoVM/releases).
#After extracting the SDK, add its bin/ directory to your PATH so the `tornado` command becomes available.
TornadoVM is distributed through our [**official website**](https://www.tornadovm.org/downloads) and **SDKMAN!**. Install a version that matches your OS, architecture, and accelerator backend.

##### Linux (x86_64)
All TornadoVM SDKs are available on the [SDKMAN! TornadoVM page](https://sdkman.io/sdks/tornadovm/).

```bash
wget https://github.com/beehive-lab/TornadoVM/releases/download/v2.1.0/tornadovm-2.1.0-opencl-linux-amd64.zip
unzip tornadovm-2.1.0-opencl-linux-amd64.zip
# Replace <path-to-sdk> manually with the absolute path of the extracted folder
export TORNADOVM_HOME="<path-to-sdk>/tornadovm-2.1.0-opencl"
export PATH=$TORNADO_SDK/bin:$PATH
#### SDKMAN! Installation (Recommended)

tornado --devices
tornado --version
##### Install SDKMAN! if not installed already
```bash
curl -s "https://get.sdkman.io" | bash
source "$HOME/.sdkman/bin/sdkman-init.sh"
sdk version
```

##### macOS (Apple Silicon)

##### Install TornadoVM via SDKMAN!
```bash
wget https://github.com/beehive-lab/TornadoVM/releases/download/v2.1.0/tornadovm-2.1.0-opencl-mac-aarch64.zip
unzip tornadovm-2.1.0-opencl-mac-aarch64.zip
# Replace <path-to-sdk> manually with the absolute path of the extracted folder
export TORNADOVM_HOME="<path-to-sdk>/tornadovm-2.1.0-opencl"
export PATH=$TORNADO_SDK/bin:$PATH

tornado --devices
tornado --version
sdk install tornadovm
```

#### Build the GPULlama3.java

#### Verify TornadoVM is Installed Correctly
```bash
# Navigate to the project directory
cd GPULlama3.java

# Source the project-specific environment paths -> this will ensure the correct paths are set for the project and the TornadoVM SDK
# Expect to see: [INFO] Environment configured for Llama3 with TornadoVM at: $TORNADO_SDK
source set_paths

# Build the project using Maven (skip tests for faster build)
# mvn clean package -DskipTests or just make
make

# Run the model (make sure you have downloaded the model file first - see below)
./llama-tornado --gpu --verbose-init --opencl --model beehive-llama-3.2-1b-instruct-fp16.gguf --prompt "tell me a joke"
tornado --devices
```


----------

### TornadoVM-Accelerated Inference Performance and Optimization Status

We are at the early stages of Java entering the AI world with features added to the JVM that enable faster execution such as GPU acceleration, Vector acceleration, high-performance access to off-heap memory and others.
<br><br>This repository provides the first Java-native implementation of Llama3 that automatically compiles and executes Java code on GPUs via TornadoVM.
The baseline numbers presented below provide a solid starting point for achieving more competitive performance compared to llama.cpp or native CUDA implementations.
[Our roadmap](https://github.com/beehive-lab/GPULlama3.java/blob/main/docs/GPULlama3_ROADMAP.md) provides the upcoming set of features that will dramatically improve the numbers below with the clear target being to achieve performance parity with the fastest implementations.
<br><br>
If you achieve additional performance data points (e.g. new hardware or platforms) please let us know to add them below.
<br><br>
In addition, if you are interested to learn more about the challenges of managed programming languages and GPU acceleration, you can read [our book](https://link.springer.com/book/10.1007/978-3-031-49559-5) or consult the [TornadoVM educational pages](https://www.tornadovm.org/resources).


| Vendor / Backend | Hardware | Llama-3.2-1B-Instruct | Llama-3.2-3B-Instruct | Optimizations |
Expand Down
6 changes: 3 additions & 3 deletions TornadoFlags.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
//JAVA_OPTIONS -XX:+UseParallelGC

// === Native library path ===
//JAVA_OPTIONS -Djava.library.path=${env.TORNADO_SDK}/lib
//JAVA_OPTIONS -Djava.library.path=${env.TORNADOVM_HOME}/lib

// === Tornado runtime classes ===
//JAVA_OPTIONS -Dtornado.load.api.implementation=uk.ac.manchester.tornado.runtime.tasks.TornadoTaskGraph
Expand All @@ -23,8 +23,8 @@
//JAVA_OPTIONS -Dtornado.load.annotation.parallel=uk.ac.manchester.tornado.api.annotations.Parallel

// === Module system ===
//JAVA_OPTIONS --module-path ${env.TORNADO_SDK}/share/java/tornado
//JAVA_OPTIONS --upgrade-module-path ${env.TORNADO_SDK}/share/java/graalJars
//JAVA_OPTIONS --module-path ${env.TORNADOVM_HOME}/share/java/tornado
//JAVA_OPTIONS --upgrade-module-path ${env.TORNADOVM_HOME}/share/java/graalJars
//JAVA_OPTIONS --add-modules ALL-SYSTEM,tornado.runtime,tornado.annotation,tornado.drivers.common,tornado.drivers.opencl

// === Common exports ===
Expand Down
6 changes: 3 additions & 3 deletions llama-tornado
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,20 @@ class LlamaRunner:

def __init__(self):
self.java_home = os.environ.get("JAVA_HOME")
self.tornado_sdk = os.environ.get("TORNADO_SDK")
self.tornado_sdk = os.environ.get("TORNADOVM_HOME")
self.llama_root = os.environ.get("LLAMA_ROOT")

if not all([self.java_home, self.tornado_sdk, self.llama_root]):
print("Error: Required environment variables not set")
print("Please ensure JAVA_HOME, TORNADO_SDK, and LLAMA_ROOT are defined")
print("Please ensure JAVA_HOME, TORNADOVM_HOME, and LLAMA_ROOT are defined")
print("Note: check set_path in root dir -> source set_path")
sys.exit(1)

def _validate_paths(self):
"""Validate that required paths exist."""
paths_to_check = {
"JAVA_HOME": self.java_home,
"TORNADO_SDK": self.tornado_sdk,
"TORNADOVM_HOME": self.tornado_sdk,
"LLAMA_ROOT": self.llama_root,
}

Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@
<dependency>
<groupId>io.github.beehive-lab</groupId>
<artifactId>tornado-api</artifactId>
<version>2.1.0</version>
<version>2.2.0</version>
</dependency>
<dependency>
<groupId>io.github.beehive-lab</groupId>
<artifactId>tornado-runtime</artifactId>
<version>2.1.0</version>
<version>2.2.0</version>
</dependency>
</dependencies>

Expand Down
4 changes: 2 additions & 2 deletions set_paths
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
export LLAMA_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

# Add TornadoVM and LLaMA bin directories to PATH
export PATH="${PATH}:${TORNADO_SDK}/bin:${LLAMA_ROOT}"
export PATH="${PATH}:${TORNADOVM_HOME}/bin:${LLAMA_ROOT}"

# Optional: Set JAVA_HOME if required
# export JAVA_HOME=/path/to/graalvm
# export PATH="${JAVA_HOME}/bin:${PATH}"

echo "[INFO] Environment configured for LLaMA3 with TornadoVM at: $TORNADO_SDK"
echo "[INFO] Environment configured for LLaMA3 with TornadoVM at: $TORNADOVM_HOME"
# ===== Notes =====
# After sourcing this script:
# 1. TornadoVM will be available for GPU computation
Expand Down
6 changes: 1 addition & 5 deletions set_paths.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,8 @@ REM Resolve the absolute path to this script's directory
set "LLAMA_ROOT=%~dp0"
set "LLAMA_ROOT=%LLAMA_ROOT:~0,-1%"

REM Set TornadoVM root and SDK paths
set "TORNADO_ROOT=%LLAMA_ROOT%\external\tornadovm"
set "TORNADO_SDK=%TORNADO_ROOT%\bin\sdk"

REM Add TornadoVM SDK and LLaMA3 bin to PATH
set "PATH=%TORNADO_SDK%;%LLAMA_ROOT%;%PATH%"
set "PATH=%TORNADOVM_HOME%;%LLAMA_ROOT%;%PATH%"

REM Optional: Set JAVA_HOME if needed
REM set "JAVA_HOME=C:\Path\To\GraalVM"
Expand Down
Loading