Getting Started

Getting Started with OaML

From zero to a trained model on Vulkan GPU compute. No CUDA, no Python, no framework dependencies.

Requirements

  • oa library — via vcpkg or CMAKE_PREFIX_PATH
  • Vulkan 1.3+ driver — any discrete or integrated GPU
  • slangc — included via vcpkg in the oa build

Full Quickstart

Fashion-MNIST end-to-end. The tutorial in Tutorial/Ml/TutorialMnistClassifier.cpp verifies 83.2% test accuracy at 244K samples/s on RTX 5090 Laptop GPU.

Getting_started.cpp

#include <Oa/Oa.h>
// Define your model — subclass OaModule, register layers
class OaMnistClassifier : public OaModule {
public:
OaMnistClassifier() {
Fc1_ = OaMakeSharedPtr<OaLinear>(784, 128);
Fc2_ = OaMakeSharedPtr<OaLinear>(128, 10);
RegisterModule("fc1", Fc1_);
RegisterModule("fc2", Fc2_);
}
OaDeviceMatrix Forward(const OaDeviceMatrix& x) override {
auto h = OaFnMatrix::Scale(x, 1.0f / 255.0f);
h = OaFnMatrix::Relu(Fc1_->Forward(h));
return Fc2_->Forward(h);
}
private:
OaSharedPtr<OaLinear> Fc1_, Fc2_;
};
int main() {
// 1. Create runtime
auto rt = OaEngine::Create({.AppName = "Train"}).Unwrap();
// 2. Instantiate model and optimizer
OaMnistClassifier model;
OaAdamW opt(model.AllParameterPtrs(), 0.001f);
OaFnGrad::SetMode(OaGradMode::Dynamic);
// 3. Training loop
for (OaI32 step = 0; step < 2000; ++step) {
sampler.NextBatch(batchX, batchY);
auto logits = model.Forward(batchX);
auto loss = OaFnMatrix::CrossEntropyLoss(logits, batchY);
OaFnGrad::Backward(loss);
opt.Step();
opt.ZeroGrad();
}
// 4. Save checkpoint
model.Save("mnist.oam");
}

Next Steps