Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
using System.ClientModel.Primitives;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Text.Json.Serialization.Metadata;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Shared.Diagnostics;
Expand Down Expand Up @@ -46,6 +48,16 @@ private static readonly Func<ResponsesClient, GetResponseOptions, RequestOptions
null, [typeof(GetResponseOptions), typeof(RequestOptions)], null)
?.CreateDelegate(typeof(Func<ResponsesClient, GetResponseOptions, RequestOptions, AsyncCollectionResult<StreamingResponseUpdate>>));

// Workaround for https://github.com/openai/openai-dotnet/pull/874.
// The OpenAI library doesn't yet expose InputImageUrl as a public property, so we access it via reflection.
// Replace this with the actual public property once it's available (e.g., part.InputImageUrl).
private static readonly PropertyInfo? _inputImageUrlProperty =
typeof(ResponseContentPart).GetProperty("ImageUrl", BindingFlags.Public | BindingFlags.Instance);
Comment thread
stephentoub marked this conversation as resolved.
Outdated

// Fallback property for cases where ImageUrl is not yet exposed as a public property but is stored in internal data.
private static readonly PropertyInfo? _serializedAdditionalRawDataProperty =
Comment thread
stephentoub marked this conversation as resolved.
Outdated
typeof(ResponseContentPart).GetProperty("SerializedAdditionalRawData", BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance);

/// <summary>Metadata about the client.</summary>
private readonly ChatClientMetadata _metadata;

Expand Down Expand Up @@ -1196,7 +1208,9 @@ private static List<AIContent> ToAIContents(IEnumerable<ResponseContentPart> con
!string.IsNullOrWhiteSpace(part.InputImageFileId) ? new HostedFileContent(part.InputImageFileId) { MediaType = "image/*" } :
!string.IsNullOrWhiteSpace(part.InputFileId) ? new HostedFileContent(part.InputFileId) { Name = part.InputFilename } :
part.InputFileBytes is not null ? new DataContent(part.InputFileBytes, part.InputFileBytesMediaType ?? "application/octet-stream") { Name = part.InputFilename } :
null;
_inputImageUrlProperty?.GetValue(part) is string inputImageUrl && !string.IsNullOrWhiteSpace(inputImageUrl) ?
new UriContent(new Uri(inputImageUrl), "image/*") :
Comment thread
stephentoub marked this conversation as resolved.
Outdated
GetInputImageUrlFromAdditionalRawData(part);
break;

case ResponseContentPartKind.Refusal:
Expand All @@ -1221,6 +1235,48 @@ private static List<AIContent> ToAIContents(IEnumerable<ResponseContentPart> con
return results;
}

/// <summary>
/// Attempts to extract the input image URL from a <see cref="ResponseContentPart"/>.
/// This is a workaround for https://github.com/openai/openai-dotnet/pull/874 until the property is publicly exposed.
/// </summary>
private static UriContent? GetInputImageUrlFromAdditionalRawData(ResponseContentPart part)
{
// Try to get the image URL from SerializedAdditionalRawData first
if (_serializedAdditionalRawDataProperty?.GetValue(part) is IDictionary<string, BinaryData> additionalData &&
additionalData.TryGetValue("image_url", out var imageUrlData))
{
var stringJsonTypeInfo = (JsonTypeInfo<string>)AIJsonUtilities.DefaultOptions.GetTypeInfo(typeof(string));
string? imageUrl = JsonSerializer.Deserialize(imageUrlData, stringJsonTypeInfo);
if (!string.IsNullOrWhiteSpace(imageUrl))
{
return new UriContent(new Uri(imageUrl), "image/*");
}
}

// Fallback: Serialize the part back to JSON and extract the image_url property
if (part is IJsonModel<ResponseContentPart> jsonModel)
{
using var stream = new MemoryStream();
using (var writer = new Utf8JsonWriter(stream))
{
jsonModel.Write(writer, ModelReaderWriterOptions.Json);
}

using var doc = JsonDocument.Parse(stream.ToArray());
if (doc.RootElement.TryGetProperty("image_url", out var imageUrlElement) &&
imageUrlElement.ValueKind == JsonValueKind.String)
{
string? imageUrl = imageUrlElement.GetString();
if (!string.IsNullOrWhiteSpace(imageUrl))
{
return new UriContent(new Uri(imageUrl), "image/*");
}
}
}

return null;
}

/// <summary>Converts any annotations from <paramref name="source"/> and stores them in <paramref name="destination"/>.</summary>
private static void PopulateAnnotations(ResponseContentPart source, AIContent destination)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5198,6 +5198,72 @@ public async Task ResponseWithRefusalContent_ParsesCorrectly()
Assert.Equal("Refusal", errorContent.ErrorCode);
}

[Fact]
public async Task ResponseWithInputImageUrl_ParsesCorrectly()
{
const string Input = """
{
"model":"gpt-4o-mini",
"input":[{"type":"message","role":"user","content":[{"type":"input_text","text":"What is in this image?"}]}]
}
""";

// The output includes a message with input_image content that has an image_url property.
// This simulates a response that echoes back the image URL that was sent as part of the request.
const string Output = """
{
"id":"resp_001",
"object":"response",
"created_at":1741892091,
"status":"completed",
"model":"gpt-4o-mini",
"output":[
{
"type":"message",
"id":"msg_001",
"status":"completed",
"role":"user",
"content":[
{"type":"input_image","image_url":"https://example.com/image.png"}
]
},
{
"type":"message",
"id":"msg_002",
"status":"completed",
"role":"assistant",
"content":[
{"type":"output_text","text":"This is a cat.","annotations":[]}
]
}
]
}
""";

using VerbatimHttpHandler handler = new(Input, Output);
using HttpClient httpClient = new(handler);
using IChatClient client = CreateResponseClient(httpClient, "gpt-4o-mini");

var response = await client.GetResponseAsync("What is in this image?");

Assert.NotNull(response);

// Check what contents we have in the first message
var userMessage = response.Messages.FirstOrDefault(m => m.Role == ChatRole.User);
Assert.NotNull(userMessage);

// The image content should be returned as a UriContent with the URL from the response
var imageContent = userMessage.Contents.OfType<UriContent>().FirstOrDefault();
Assert.NotNull(imageContent);
Assert.Equal("https://example.com/image.png", imageContent.Uri.ToString());
Assert.Equal("image/*", imageContent.MediaType);

// The second message should contain the assistant's response
var assistantMessage = response.Messages.LastOrDefault(m => m.Role == ChatRole.Assistant);
Assert.NotNull(assistantMessage);
Assert.Equal("This is a cat.", assistantMessage.Text);
}

[Fact]
public async Task HostedImageGenerationTool_NonStreaming()
{
Expand Down
Loading