Skip to main content

Documentation Index

Fetch the complete documentation index at: https://mintlify.com/microsoft/onnxruntime/llms.txt

Use this file to discover all available pages before exploring further.

The SessionOptions class provides configuration options for creating and running inference sessions.

Namespace

Microsoft.ML.OnnxRuntime

Class Declaration

public class SessionOptions : SafeHandle

Constructor

SessionOptions()

Creates a new SessionOptions instance with default settings.
public SessionOptions()
Example:
var options = new SessionOptions();

Static Factory Methods

MakeSessionOptionWithCudaProvider

Creates SessionOptions configured for CUDA execution.
public static SessionOptions MakeSessionOptionWithCudaProvider(int deviceId = 0)
Parameters:
  • deviceId (int): CUDA device ID (default: 0)
Example:
var options = SessionOptions.MakeSessionOptionWithCudaProvider(0);
var session = new InferenceSession("model.onnx", options);

MakeSessionOptionWithCudaProvider (Advanced)

Creates SessionOptions with detailed CUDA provider options.
public static SessionOptions MakeSessionOptionWithCudaProvider(
    OrtCUDAProviderOptions cudaProviderOptions)
Example:
var cudaOptions = new OrtCUDAProviderOptions();
cudaOptions.DeviceId = 0;
cudaOptions.ArenaExtendStrategy = ArenaExtendStrategy.kSameAsRequested;

var options = SessionOptions.MakeSessionOptionWithCudaProvider(cudaOptions);

MakeSessionOptionWithTensorrtProvider

Creates SessionOptions configured for TensorRT execution.
public static SessionOptions MakeSessionOptionWithTensorrtProvider(int deviceId = 0)

Properties and Methods

Graph Optimization Level

Controls the level of graph optimizations applied.
public GraphOptimizationLevel GraphOptimizationLevel { get; set; }
Values:
  • ORT_DISABLE_ALL - No optimizations
  • ORT_ENABLE_BASIC - Basic optimizations (constant folding, redundant node elimination)
  • ORT_ENABLE_EXTENDED - Extended optimizations (operator fusion)
  • ORT_ENABLE_LAYOUT - Layout transformations
  • ORT_ENABLE_ALL - All available optimizations
Example:
var options = new SessionOptions();
options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;

Execution Mode

Controls sequential vs parallel execution.
public ExecutionMode ExecutionMode { get; set; }
Values:
  • ORT_SEQUENTIAL - Execute operators sequentially
  • ORT_PARALLEL - Execute operators in parallel when possible
Example:
options.ExecutionMode = ExecutionMode.ORT_PARALLEL;

Thread Configuration

IntraOpNumThreads

Sets the number of threads used to parallelize execution within nodes.
public int IntraOpNumThreads { get; set; }
Example:
options.IntraOpNumThreads = 4;

InterOpNumThreads

Sets the number of threads used to parallelize execution between nodes.
public int InterOpNumThreads { get; set; }
Example:
options.InterOpNumThreads = 2;

Memory Optimization

EnableCpuMemArena

Enables the CPU memory arena allocator.
public bool EnableCpuMemArena { get; set; }
Example:
options.EnableCpuMemArena = true;

EnableMemPattern

Enables memory pattern optimization.
public bool EnableMemPattern { get; set; }
Example:
options.EnableMemPattern = true;

Profiling

EnableProfiling

Enables profiling for the session.
public bool EnableProfiling { get; set; }

ProfileOutputPathPrefix

Sets the output path prefix for profiling data.
public string ProfileOutputPathPrefix { get; set; }
Example:
options.EnableProfiling = true;
options.ProfileOutputPathPrefix = "profile_";

Logging

LogId

Sets the logger ID for the session.
public string LogId { get; set; }

LogSeverityLevel

Sets the minimum log severity level.
public OrtLoggingLevel LogSeverityLevel { get; set; }
Example:
options.LogId = "MyModel";
options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_WARNING;

Optimized Model Output

OptimizedModelFilePath

Path to save the optimized model.
public string OptimizedModelFilePath { get; set; }
Example:
options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
options.OptimizedModelFilePath = "model_optimized.onnx";

Execution Provider Configuration

AppendExecutionProvider_CPU

Adds CPU execution provider.
public void AppendExecutionProvider_CPU(int useArena = 1)

AppendExecutionProvider_CUDA

Adds CUDA execution provider.
public void AppendExecutionProvider_CUDA(int deviceId = 0)
Example:
var options = new SessionOptions();
options.AppendExecutionProvider_CUDA(0);
options.AppendExecutionProvider_CPU(); // Fallback

AppendExecutionProvider_CUDA (Advanced)

Adds CUDA provider with detailed options.
public void AppendExecutionProvider_CUDA(OrtCUDAProviderOptions cudaOptions)

AppendExecutionProvider_DML

Adds DirectML execution provider (Windows only).
public void AppendExecutionProvider_DML(int deviceId = 0)

AppendExecutionProvider_TensorRT

Adds TensorRT execution provider.
public void AppendExecutionProvider_Tensorrt(int deviceId = 0)

AppendExecutionProvider_OpenVINO

Adds OpenVINO execution provider.
public void AppendExecutionProvider_OpenVINO(string deviceType = "CPU")
Example:
options.AppendExecutionProvider_OpenVINO("GPU");

Advanced Configuration

AddConfigEntry

Adds a configuration entry.
public void AddConfigEntry(string key, string value)
Example:
options.AddConfigEntry("session.disable_prepacking", "1");

AddSessionConfigEntry

Adds a session configuration entry.
public void AddSessionConfigEntry(string key, string value)

AddInitializer

Adds an initializer to the session.
public void AddInitializer(string name, OrtValue ortValue)

RegisterCustomOpLibrary

Registers a custom operator library.
public void RegisterCustomOpLibrary(string libraryPath)
Example:
options.RegisterCustomOpLibrary("custom_ops.dll");

Complete Examples

Basic Configuration

using Microsoft.ML.OnnxRuntime;

var options = new SessionOptions();
options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
options.IntraOpNumThreads = 4;
options.EnableCpuMemArena = true;
options.EnableMemPattern = true;

var session = new InferenceSession("model.onnx", options);

GPU Acceleration (CUDA)

var options = new SessionOptions();
options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;

try
{
    options.AppendExecutionProvider_CUDA(0);
    Console.WriteLine("Using CUDA GPU");
}
catch (Exception ex)
{
    Console.WriteLine($"CUDA not available: {ex.Message}");
    options.AppendExecutionProvider_CPU();
}

var session = new InferenceSession("model.onnx", options);

Production Configuration

public static SessionOptions CreateProductionOptions()
{
    var options = new SessionOptions();
    
    // Optimization
    options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
    options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
    
    // Threading
    options.IntraOpNumThreads = Environment.ProcessorCount;
    options.InterOpNumThreads = 1;
    
    // Memory
    options.EnableCpuMemArena = true;
    options.EnableMemPattern = true;
    
    // Logging
    options.LogId = "ProductionModel";
    options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_WARNING;
    
    // Execution providers
    try
    {
        options.AppendExecutionProvider_CUDA(0);
    }
    catch
    {
        options.AppendExecutionProvider_CPU();
    }
    
    return options;
}

Model Optimization and Export

var options = new SessionOptions();
options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
options.OptimizedModelFilePath = "model_optimized.ort";

// This will save the optimized model when creating the session
using (var session = new InferenceSession("model.onnx", options))
{
    Console.WriteLine("Optimized model saved to model_optimized.ort");
}

Custom Operators

var options = new SessionOptions();

// Register custom operator library
options.RegisterCustomOpLibrary("my_custom_ops.dll");

var session = new InferenceSession("model_with_custom_ops.onnx", options);

Advanced CUDA Configuration

var cudaOptions = new OrtCUDAProviderOptions();
cudaOptions.DeviceId = 0;
cudaOptions.CudnnConvAlgoSearch = CudnnConvAlgoSearch.DEFAULT;
cudaOptions.GpuMemLimit = 2 * 1024 * 1024 * 1024; // 2GB
cudaOptions.ArenaExtendStrategy = ArenaExtendStrategy.kSameAsRequested;

var options = new SessionOptions();
options.AppendExecutionProvider_CUDA(cudaOptions);

var session = new InferenceSession("model.onnx", options);

Performance Tuning Guide

CPU Optimization

var options = new SessionOptions();
options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
options.IntraOpNumThreads = Environment.ProcessorCount;
options.EnableCpuMemArena = true;
options.EnableMemPattern = true;

GPU Optimization

var options = SessionOptions.MakeSessionOptionWithCudaProvider(0);
options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_EXTENDED;
options.ExecutionMode = ExecutionMode.ORT_SEQUENTIAL;

Memory-Constrained Environments

var options = new SessionOptions();
options.EnableCpuMemArena = false; // Disable arena for lower memory
options.EnableMemPattern = false;
options.IntraOpNumThreads = 2; // Limit threads

Best Practices

  1. Set appropriate optimization level: Use ORT_ENABLE_ALL for production
  2. Configure threading: Match your workload and hardware
  3. Enable memory optimizations: Unless memory-constrained
  4. Use GPU when available: Significant performance gains
  5. Profile your model: Enable profiling to identify bottlenecks
  6. Reuse SessionOptions: Create once, use for multiple sessions
  7. Always dispose: SessionOptions implements IDisposable

See Also