How to Run YOLOv8 Inference Directly in Golang (with ONNX)
And mark the detected objects in the output image

Software Engineer x Data Engineer - I make the world a better place to live with software that enables data-driven decision-making
This is a focused how-to article, I assume you already know what YOLO is and have a basic Golang knowledge.
Notes:
the inference example is based on the yalue examples
the code used in this article is adapted to the
MacOSwith theARMarchitecture, for otherOSes and architecures, you will need libs available here and an example on how to use them is located here
Step 1: Convert YOLO to ONNX
First we need to convert the YOLOv8 model to the ONNX format. To do this we’ll install the ultralytics package with pip and use the yolo export command.
The image size here - 640 - is important, it has to be the same size that we’ll use later in our code.
mkdir yolov8 && cd yolov8
# Create and enable virtual env.
python3.10 -m venv env
source env/bin/activate
pip install ultralytics
yolo export model=yolov8n.pt format=onnx imgsz=640
# Quit virtual env.
deactivate
The ouput should be similar to:
(...)
ONNX: starting export with onnx 1.17.0 opset 17...
ONNX: slimming with onnxslim 0.1.61...
ONNX: export success ✅ 19.8s, saved as 'yolov8n.onnx' (12.2 MB)
Export complete (20.5s)
Results saved to /(...)/yolov8
Predict: yolo predict task=detect model=yolov8n.onnx imgsz=640
Validate: yolo val task=detect model=yolov8n.onnx imgsz=640 data=coco.yaml
Visualize: https://netron.app
💡 Learn more at https://docs.ultralytics.com/modes/export
This will create a yolov8n.onnx file with the onnx YOLOv8 model. Please be sure to copy the model to your directory.
I’ve already included a converted model file in the article full code example.
Step 2: Load Image File
pic, e := loadImageFile(imagePath)
if e != nil {
fmt.Printf("error loading input image: %s\n", e)
return 1
}
(...)
func loadImageFile(filePath string) (image.Image, error) {
f, e := os.Open(filePath)
if e != nil {
return nil, fmt.Errorf("error opening %s: %w", filePath, e)
}
defer func(f *os.File) {
err := f.Close()
if err != nil {
fmt.Printf("error closing %s: %v\n", filePath, err)
}
}(f)
pic, _, e := image.Decode(f)
if e != nil {
return nil, fmt.Errorf("error decoding %s: %w", filePath, e)
}
return pic, nil
}
Step 3: Init ONNX Session
modelSession, e := initSession()
if e != nil {
fmt.Printf("Error creating session and tensors: %s\n", e)
return 1
}
defer modelSession.Destroy()
(...)
func initSession() (*ModelSession, error) {
ort.SetSharedLibraryPath(sharedLibPath)
err := ort.InitializeEnvironment()
if err != nil {
return nil, fmt.Errorf("error initializing ORT environment: %w", err)
}
inputShape := ort.NewShape(1, 3, 640, 640)
inputTensor, err := ort.NewEmptyTensor[float32](inputShape)
if err != nil {
return nil, fmt.Errorf("error creating input tensor: %w", err)
}
outputShape := ort.NewShape(1, 84, 8400)
outputTensor, err := ort.NewEmptyTensor[float32](outputShape)
if err != nil {
inputTensor.Destroy()
return nil, fmt.Errorf("error creating output tensor: %w", err)
}
options, err := ort.NewSessionOptions()
if err != nil {
inputTensor.Destroy()
outputTensor.Destroy()
return nil, fmt.Errorf("error creating ORT session options: %w", err)
}
defer options.Destroy()
session, err := ort.NewAdvancedSession(modelPath,
[]string{"images"}, []string{"output0"},
[]ort.ArbitraryTensor{inputTensor},
[]ort.ArbitraryTensor{outputTensor},
options)
if err != nil {
inputTensor.Destroy()
outputTensor.Destroy()
return nil, fmt.Errorf("error creating ORT session: %w", err)
}
return &ModelSession{
Session: session,
Input: inputTensor,
Output: outputTensor,
}, nil
}
Step 4: Prepare Input
This is where we need to use the data from the image and fill the YOLO input tensor with it:
e = prepareInput(pic, modelSession.Input)
if e != nil {
fmt.Printf("Error converting image to network input: %s\n", e)
return 1
}
(...)
// Populates a YOLOv8n input tensor with the contents of the given image.
func prepareInput(pic image.Image, dst *ort.Tensor[float32]) error {
data := dst.GetData()
channelSize := 640 * 640
if len(data) < (channelSize * 3) {
return fmt.Errorf("destination tensor only holds %d floats, needs %d (make sure it's the right shape!)", len(data), channelSize*3)
}
redChannel := data[0:channelSize]
greenChannel := data[channelSize : channelSize*2]
blueChannel := data[channelSize*2 : channelSize*3]
// Resize the image to 640x640 using Lanczos3 algorithm
pic = resize.Resize(640, 640, pic, resize.Lanczos3)
i := 0
for y := 0; y < 640; y++ {
for x := 0; x < 640; x++ {
r, g, b, _ := pic.At(x, y).RGBA()
redChannel[i] = float32(r>>8) / 255.0
greenChannel[i] = float32(g>>8) / 255.0
blueChannel[i] = float32(b>>8) / 255.0
i++
}
}
return nil
}
Step 5: Run Session
Run the inference:
e = modelSession.Session.Run()
if e != nil {
fmt.Printf("Error running ORT session: %s\n", e)
return 1
}
Step 6: Process Output
Now we need to process the inference results and prepare an output in a human readable format:
boxes := processOutput(modelSession.Output.GetData(), originalWidth,
originalHeight)
for i, box := range boxes {
fmt.Printf("Box %d: %s\n", i, &box)
}
(...)
func processOutput(output []float32, originalWidth,
originalHeight int) []boundingBox {
boundingBoxes := make([]boundingBox, 0, 8400)
var classID int
var probability float32
// Iterate through the output array, considering 8400 indices
for idx := 0; idx < 8400; idx++ {
// Iterate through 80 classes and find the class with the highest probability
probability = -1e9
for col := 0; col < 80; col++ {
currentProb := output[8400*(col+4)+idx]
if currentProb > probability {
probability = currentProb
classID = col
}
}
// If the probability is less than 0.5, continue to the next index
if probability < 0.5 {
continue
}
// Extract the coordinates and dimensions of the bounding box
xc, yc := output[idx], output[8400+idx]
w, h := output[2*8400+idx], output[3*8400+idx]
x1 := (xc - w/2) / 640 * float32(originalWidth)
y1 := (yc - h/2) / 640 * float32(originalHeight)
x2 := (xc + w/2) / 640 * float32(originalWidth)
y2 := (yc + h/2) / 640 * float32(originalHeight)
// Append the bounding box to the result
boundingBoxes = append(boundingBoxes, boundingBox{
label: yoloClasses[classID],
confidence: probability,
x1: x1,
y1: y1,
x2: x2,
y2: y2,
})
}
// Sort the bounding boxes by probability
sort.Slice(boundingBoxes, func(i, j int) bool {
return boundingBoxes[i].confidence < boundingBoxes[j].confidence
})
// Define a slice to hold the final result
mergedResults := make([]boundingBox, 0, len(boundingBoxes))
// Iterate through sorted bounding boxes, removing overlaps
for _, candidateBox := range boundingBoxes {
overlapsExistingBox := false
for _, existingBox := range mergedResults {
if (&candidateBox).iou(&existingBox) > 0.7 {
overlapsExistingBox = true
break
}
}
if !overlapsExistingBox {
mergedResults = append(mergedResults, candidateBox)
}
}
// This will still be in sorted order by confidence
return mergedResults
}
It will produce something similar:
go run main.go
Box 0: Object laptop (confidence 0.524439): (213.599579, 243.196198), (419.911469, 350.581512)
Box 1: Object cup (confidence 0.563491): (433.477356, 257.403839), (571.929077, 355.463074)
Box 2: Object parking meter (confidence 0.578624): (406.172058, 50.842918), (565.424744, 231.428116)
The inference on the Apple M1 Pro (2020) takes about 10 seconds.
Step 7: Draw Output Image with Boxes
In this step we create an output image based on the input image, but with the boxes marking detected objects.
Note:
- for labels you need to provide the correct path to the font you want to use (see
fontPathconst at the top of the program)
const (
outputImagePath = "./output.jpg"
(...)
fontPath = "/Library/Fonts/Arial Unicode.ttf"
)
(...)
err := drawBoxes(imagePath, outputImagePath, boxes)
if err != nil {
fmt.Printf("error drawing boxes: %s\n", err)
return 1
}
(...)
// Draws bounding boxes with labels onto the image and saves the result
func drawBoxes(inputPath string, outputPath string, boxes []boundingBox) error {
// Open and decode the image
f, err := os.Open(inputPath)
if err != nil {
return fmt.Errorf("error opening input image: %w", err)
}
defer f.Close()
img, _, err := image.Decode(f)
if err != nil {
return fmt.Errorf("error decoding image: %w", err)
}
dc := gg.NewContextForImage(img)
dc.SetLineWidth(1)
fontLoaded := false
if err := dc.LoadFontFace(fontPath, 14); err == nil {
fontLoaded = true
}
for _, box := range boxes {
// Draw rectangle
dc.SetRGB(1, 0, 0) // red
dc.DrawRectangle(float64(box.x1), float64(box.y1), float64(box.x2-box.x1), float64(box.y2-box.y1))
dc.Stroke()
// Draw label
if fontLoaded {
label := fmt.Sprintf("%s (%.2f)", box.label, box.confidence)
dc.SetRGB(0, 0, 1)
dc.DrawStringAnchored(label, float64(box.x1)+4, float64(box.y1)-4, 0, 1)
}
}
// Save the result
out, err := os.Create(outputPath)
if err != nil {
return fmt.Errorf("error creating output file: %w", err)
}
defer out.Close()
return jpeg.Encode(out, dc.Image(), &jpeg.Options{Quality: 90})
}
The output should be like this:

Hope you like it! Nice hacking!
Sources
ONNX Runtime: https://github.com/yalue/onnxruntime_go
Image detection: https://github.com/yalue/onnxruntime_go_examples/tree/master/image_object_detect
Sample images: https://www.kaggle.com/datasets/kkhandekar/object-detection-sample-images
Virtual envs: https://docs.python.org/3/library/venv.html
Article Golang code repository: https://github.com/flashlabs/kiss-samples/tree/main/yolo-in-go-with-onnx




