PySymGym
diff --git a/‎VSharp.API/VSharp.cs‎
Lines changed: 3 additions & 3 deletions b/‎VSharp.API/VSharp.cs‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎VSharp.API/VSharpOptions.cs‎
Lines changed: 4 additions & 4 deletions b/‎VSharp.API/VSharpOptions.cs‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎VSharp.Explorer/AISearcher.fs‎
Lines changed: 112 additions & 54 deletions b/‎VSharp.Explorer/AISearcher.fs‎
Lines changed: 112 additions & 54 deletions
@@ -191,10 +191,10 @@ private static Statistics StartExploration(
                     stopOnCoverageAchieved: 100,
                     randomSeed: options.RandomSeed,
                     stepsLimit: options.StepsLimit,
-                    aiAgentTrainingOptions: options.AIAgentTrainingOptions == null ? FSharpOption<AIAgentTrainingOptions>.None : FSharpOption<AIAgentTrainingOptions>.Some(options.AIAgentTrainingOptions),
+                    aiOptions: options.AIOptions == null ? FSharpOption<AIOptions>.None : FSharpOption<AIOptions>.Some(options.AIOptions),
                     pathToModel: options.PathToModel == null ? FSharpOption<string>.None : FSharpOption<string>.Some(options.PathToModel),
-                    useGPU: options.UseGPU == null ? FSharpOption<bool>.None : FSharpOption<bool>.Some(options.UseGPU),
-                    optimize: options.Optimize == null ? FSharpOption<bool>.None : FSharpOption<bool>.Some(options.Optimize)
+                    useGPU: options.UseGPU,
+                    optimize: options.Optimize
                     );
 
             var fuzzerOptions =
 
@@ -113,7 +113,7 @@ public readonly record struct VSharpOptions
     public readonly bool ReleaseBranches = DefaultReleaseBranches;
     public readonly int RandomSeed = DefaultRandomSeed;
     public readonly uint StepsLimit = DefaultStepsLimit;
-    public readonly AIAgentTrainingOptions AIAgentTrainingOptions = null;
+    public readonly AIOptions? AIOptions = null;
     public readonly string PathToModel = DefaultPathToModel;
     public readonly bool UseGPU = false;
     public readonly bool Optimize = false;
@@ -133,7 +133,7 @@ public readonly record struct VSharpOptions
     /// <param name="releaseBranches">If true and timeout is specified, a part of allotted time in the end is given to execute remaining states without branching.</param>
     /// <param name="randomSeed">Fixed seed for random operations. Used if greater than or equal to zero.</param>
     /// <param name="stepsLimit">Number of symbolic machine steps to stop execution after. Zero value means no limit.</param>
-    /// <param name="aiAgentTrainingOptions">Settings for AI searcher training.</param>
+    /// <param name="aiOptions">Settings for AI searcher training.</param>
     /// <param name="pathToModel">Path to ONNX file with model to use in AI searcher.</param>
     /// <param name="useGPU">Specifies whether the ONNX execution session should use a CUDA-enabled GPU.</param>
     /// <param name="optimize">Enabling options like parallel execution and various graph transformations to enhance performance of ONNX.</param>
@@ -150,7 +150,7 @@ public VSharpOptions(
         bool releaseBranches = DefaultReleaseBranches,
         int randomSeed = DefaultRandomSeed,
         uint stepsLimit = DefaultStepsLimit,
-        AIAgentTrainingOptions aiAgentTrainingOptions = null,
+        AIOptions? aiOptions = null,
         string pathToModel = DefaultPathToModel,
         bool useGPU = false,
         bool optimize = false)
@@ -167,7 +167,7 @@ public VSharpOptions(
         ReleaseBranches = releaseBranches;
         RandomSeed = randomSeed;
         StepsLimit = stepsLimit;
-        AIAgentTrainingOptions = aiAgentTrainingOptions;
+        AIOptions = aiOptions;
         PathToModel = pathToModel;
         UseGPU = useGPU;
         Optimize = optimize;
 
@@ -2,48 +2,22 @@ namespace VSharp.Explorer
 
 open System.Collections.Generic
 open Microsoft.ML.OnnxRuntime
+open System
+open System.Text
+open System.Text.Json
 open VSharp
 open VSharp.IL.Serializer
 open VSharp.ML.GameServer.Messages
 
-type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentTrainingOptions>) =
-    let stepsToSwitchToAI =
-        match aiAgentTrainingOptions with
-        | None -> 0u<step>
-        | Some options -> options.stepsToSwitchToAI
-
-    let stepsToPlay =
-        match aiAgentTrainingOptions with
-        | None -> 0u<step>
-        | Some options -> options.stepsToPlay
-
-    let mutable lastCollectedStatistics = Statistics()
-    let mutable defaultSearcherSteps = 0u<step>
-    let mutable (gameState: Option<GameState>) = None
-    let mutable useDefaultSearcher = stepsToSwitchToAI > 0u<step>
-    let mutable afterFirstAIPeek = false
-    let mutable incorrectPredictedStateId = false
-
-    let defaultSearcher =
-        match aiAgentTrainingOptions with
-        | None -> BFSSearcher() :> IForwardSearcher
-        | Some options ->
-            match options.defaultSearchStrategy with
-            | BFSMode -> BFSSearcher() :> IForwardSearcher
-            | DFSMode -> DFSSearcher() :> IForwardSearcher
-            | x -> failwithf $"Unexpected default searcher {x}. DFS and BFS supported for now."
-
-    let mutable stepsPlayed = 0u<step>
-
-    let isInAIMode () =
-        (not useDefaultSearcher) && afterFirstAIPeek
-
-    let q = ResizeArray<_>()
-    let availableStates = HashSet<_>()
+type AIMode =
+    | Runner
+    | TrainingSendModel
+    | TrainingSendEachStep
 
-    let updateGameState (delta: GameState) =
+module GameUtils =
+    let updateGameState (delta: GameState) (gameState: Option<GameState>) =
         match gameState with
-        | None -> gameState <- Some delta
+        | None -> Some delta
         | Some s ->
             let updatedBasicBlocks = delta.GraphVertices |> Array.map (fun b -> b.Id) |> HashSet
             let updatedStates = delta.States |> Array.map (fun s -> s.Id) |> HashSet
@@ -86,14 +60,56 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
                         s.Children |> Array.filter activeStates.Contains
                     ))
 
-            let pathConditionVertices =
-                ResizeArray<PathConditionVertex> s.PathConditionVertices
+            let pathConditionVertices = ResizeArray<PathConditionVertex> s.PathConditionVertices
 
             pathConditionVertices.AddRange delta.PathConditionVertices
 
-            gameState <-
-                Some
-                <| GameState(vertices.ToArray(), states, pathConditionVertices.ToArray(), edges.ToArray())
+            Some <| GameState(vertices.ToArray(), states, pathConditionVertices.ToArray(), edges.ToArray())
+
+    let convertOutputToJson (output: IDisposableReadOnlyCollection<OrtValue>) =
+        seq { 0 .. output.Count - 1 }
+        |> Seq.map (fun i -> output[i].GetTensorDataAsSpan<float32>().ToArray())
+
+type internal AISearcher(oracle: Oracle, aiAgentTrainingMode: Option<AIAgentTrainingMode>) =
+    let stepsToSwitchToAI =
+        match aiAgentTrainingMode with
+        | None -> 0u<step>
+        | Some(SendModel options) -> options.aiAgentTrainingOptions.stepsToSwitchToAI
+        | Some(SendEachStep options) -> options.aiAgentTrainingOptions.stepsToSwitchToAI
+
+    let stepsToPlay =
+        match aiAgentTrainingMode with
+        | None -> 0u<step>
+        | Some(SendModel options) -> options.aiAgentTrainingOptions.stepsToPlay
+        | Some(SendEachStep options) -> options.aiAgentTrainingOptions.stepsToPlay
+
+    let mutable lastCollectedStatistics = Statistics()
+    let mutable defaultSearcherSteps = 0u<step>
+    let mutable (gameState: Option<GameState>) = None
+    let mutable useDefaultSearcher = stepsToSwitchToAI > 0u<step>
+    let mutable afterFirstAIPeek = false
+    let mutable incorrectPredictedStateId = false
+
+    let defaultSearcher =
+        let pickSearcher =
+            function
+            | BFSMode -> BFSSearcher() :> IForwardSearcher
+            | DFSMode -> DFSSearcher() :> IForwardSearcher
+            | x -> failwithf $"Unexpected default searcher {x}. DFS and BFS supported for now."
+
+        match aiAgentTrainingMode with
+        | None -> BFSSearcher() :> IForwardSearcher
+        | Some(SendModel options) -> pickSearcher options.aiAgentTrainingOptions.aiBaseOptions.defaultSearchStrategy
+        | Some(SendEachStep options) -> pickSearcher options.aiAgentTrainingOptions.aiBaseOptions.defaultSearchStrategy
+
+    let mutable stepsPlayed = 0u<step>
+
+    let isInAIMode () =
+        (not useDefaultSearcher) && afterFirstAIPeek
+
+    let q = ResizeArray<_>()
+    let availableStates = HashSet<_>()
+
 
 
     let init states =
@@ -128,15 +144,19 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
         for bb in state._history do
             bb.Key.AssociatedStates.Remove state |> ignore
 
-    let inTrainMode = aiAgentTrainingOptions.IsSome
+    let aiMode =
+        match aiAgentTrainingMode with
+        | Some(SendEachStep _) -> TrainingSendEachStep
+        | Some(SendModel _) -> TrainingSendModel
+        | None -> Runner
 
     let pick selector =
         if useDefaultSearcher then
             defaultSearcherSteps <- defaultSearcherSteps + 1u<step>
 
             if Seq.length availableStates > 0 then
                 let gameStateDelta = collectGameStateDelta ()
-                updateGameState gameStateDelta
+                gameState <- GameUtils.updateGameState gameStateDelta gameState
                 let statistics = computeStatistics gameState.Value
                 Application.applicationGraphDelta.Clear()
                 lastCollectedStatistics <- statistics
@@ -149,7 +169,7 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
             Some(Seq.head availableStates)
         else
             let gameStateDelta = collectGameStateDelta ()
-            updateGameState gameStateDelta
+            gameState <- GameUtils.updateGameState gameStateDelta gameState
             let statistics = computeStatistics gameState.Value
 
             if isInAIMode () then
@@ -158,14 +178,18 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
 
             Application.applicationGraphDelta.Clear()
 
-            if inTrainMode && stepsToPlay = stepsPlayed then
+            if stepsToPlay = stepsPlayed then
                 None
             else
                 let toPredict =
-                    if inTrainMode && stepsPlayed > 0u<step> then
-                        gameStateDelta
-                    else
-                        gameState.Value
+                    match aiMode with
+                    | TrainingSendEachStep
+                    | TrainingSendModel ->
+                        if stepsPlayed > 0u<step> then
+                            gameStateDelta
+                        else
+                            gameState.Value
+                    | Runner -> gameState.Value
 
                 let stateId = oracle.Predict toPredict
                 afterFirstAIPeek <- true
@@ -180,12 +204,19 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
                     oracle.Feedback(Feedback.IncorrectPredictedStateId stateId)
                     None
 
-    new(pathToONNX: string, useGPU: bool, optimize: bool) =
+    new
+        (
+            pathToONNX: string,
+            useGPU: bool,
+            optimize: bool,
+            aiAgentTrainingModelOptions: Option<AIAgentTrainingModelOptions>
+        ) =
         let numOfVertexAttributes = 7
         let numOfStateAttributes = 7
         let numOfHistoryEdgeAttributes = 2
 
-        let createOracle (pathToONNX: string) =
+
+        let createOracleRunner (pathToONNX: string, aiAgentTrainingModelOptions: Option<AIAgentTrainingModelOptions>) =
             let sessionOptions =
                 if useGPU then
                     SessionOptions.MakeSessionOptionWithCudaProvider(0)
@@ -199,10 +230,21 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
                 sessionOptions.GraphOptimizationLevel <- GraphOptimizationLevel.ORT_ENABLE_BASIC
 
             let session = new InferenceSession(pathToONNX, sessionOptions)
+
             let runOptions = new RunOptions()
             let feedback (x: Feedback) = ()
 
-            let predict (gameState: GameState) =
+            let mutable stepsPlayed = 0
+            let mutable currentGameState = None
+
+            let predict (gameStateOrDelta: GameState) =
+                let _ =
+                    match aiAgentTrainingModelOptions with
+                    | Some _ when not (stepsPlayed = 0) ->
+                        currentGameState <- GameUtils.updateGameState gameStateOrDelta currentGameState
+                    | _ -> currentGameState <- Some gameStateOrDelta
+
+                let gameState = currentGameState.Value
                 let stateIds = Dictionary<uint<stateId>, int>()
                 let verticesIds = Dictionary<uint<basicBlockGlobalId>, int>()
 
@@ -243,7 +285,7 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
                             let j = i * numOfStateAttributes
                             attributes.[j] <- float32 v.Position
                             // TODO: Support path condition
-                            // attributes.[j + 1] <- float32 v.PathConditionSize 
+                            // attributes.[j + 1] <- float32 v.PathConditionSize
                             attributes.[j + 2] <- float32 v.VisitedAgainVertices
                             attributes.[j + 3] <- float32 v.VisitedNotCoveredVerticesInZone
                             attributes.[j + 4] <- float32 v.VisitedNotCoveredVerticesOutOfZone
@@ -350,14 +392,30 @@ type internal AISearcher(oracle: Oracle, aiAgentTrainingOptions: Option<AIAgentT
                     res
 
                 let output = session.Run(runOptions, networkInput, session.OutputNames)
+
+                let _ =
+                    match aiAgentTrainingModelOptions with
+                    | Some aiAgentOptions ->
+                        aiAgentOptions.stepSaver (
+                            AIGameStep(gameState = gameStateOrDelta, output = GameUtils.convertOutputToJson output)
+                        )
+                    | None -> ()
+
+                stepsPlayed <- stepsPlayed + 1
+
                 let weighedStates = output[0].GetTensorDataAsSpan<float32>().ToArray()
 
                 let id = weighedStates |> Array.mapi (fun i v -> i, v) |> Array.maxBy snd |> fst
                 stateIds |> Seq.find (fun kvp -> kvp.Value = id) |> (fun x -> x.Key)
 
             Oracle(predict, feedback)
 
-        AISearcher(createOracle pathToONNX, None)
+        let aiAgentTrainingOptions =
+            match aiAgentTrainingModelOptions with
+            | Some aiAgentTrainingModelOptions -> Some(SendModel aiAgentTrainingModelOptions)
+            | None -> None
+
+        AISearcher(createOracleRunner (pathToONNX, aiAgentTrainingModelOptions), aiAgentTrainingOptions)
 
     interface IForwardSearcher with
         override x.Init states = init states