diff --git a/resources_servers/structured_outputs/README.md b/resources_servers/structured_outputs/README.md index d1319f0d1..7e02e7b08 100644 --- a/resources_servers/structured_outputs/README.md +++ b/resources_servers/structured_outputs/README.md @@ -15,14 +15,14 @@ We recommend formatting the dataset to test the model's ability to follow instru 1. Different Instruction Locations 1. The instruction can be in the system or user message, and can be before or after the question. 2. Difficulty of Instructions - 1. The instruction can be simple, or detailed + 1. The instruction can be simple, or detailed: 1. e.g. simple: `Schema: {schema}` 2. e.g. detailed `Please format your answer using the following schema: {schema}. Remember to validate all typing and formatting constraints. Do not format your answer in Markdown,` 3. Difficulty of Question 1. The question exists only to serve as a proxy for eliciting a response worthy of output formatting. To focus the environment towards schema adherence, the question should be easy. 1. e.g. simple: `Please provide a response based on the document and provided schema`. -For the JSON variant, we use the `openapi-schema-validator` library for verification. +For any parsed outputs, we use the `openapi-schema-validator` library for verification. > [!IMPORTANT] > Evaluation is only based on the **schema adherence** of the generated output. @@ -59,6 +59,11 @@ ng_prepare_data "+config_paths=[${config_paths}]" \ +mode=train_preparation +should_download=true ``` +# Testing +``` +ng_test +entrypoint=resources_servers/structured_outputs +``` + # Licensing information Code: Apache 2.0 @@ -67,3 +72,4 @@ Data: CC BY 4.0 Dependencies - nemo_gym: Apache 2.0 - openapi-schema-validator: [BSD-3-Clause license](https://github.com/python-openapi/openapi-schema-validator/blob/master/LICENSE) +- xmltodict: [MIT](https://github.com/martinblech/xmltodict/blob/master/LICENSE) diff --git a/resources_servers/structured_outputs/app.py b/resources_servers/structured_outputs/app.py index 5a7292b8f..cedd4641d 100644 --- a/resources_servers/structured_outputs/app.py +++ b/resources_servers/structured_outputs/app.py @@ -16,6 +16,8 @@ from enum import StrEnum from typing import Any, Dict +import xmltodict +import yaml from fastapi import FastAPI from openapi_schema_validator import validate as validate_against_schema_openapi @@ -28,11 +30,13 @@ class StructuredOutputsResourcesServerConfig(BaseResourcesServerConfig): - pass + xml_coerce_types: bool = True class SchemaType(StrEnum): JSON = "json" + YAML = "yaml" + XML = "xml" class StructuredOutputsVerifyRequest(BaseVerifyRequest): @@ -52,6 +56,9 @@ async def verify(self, body: StructuredOutputsVerifyRequest) -> BaseVerifyRespon schema_type = body.schema_type schema_str = body.schema_str + if schema_type not in list(SchemaType): + raise NotImplementedError(f"SchemaType must be one of {list(SchemaType)}, got {schema_type} !") + # get model generation. assistant_responses = [] for output_item in body.response.output: @@ -65,30 +72,86 @@ async def verify(self, body: StructuredOutputsVerifyRequest) -> BaseVerifyRespon assistant_responses.append(content_item.text) response_text = "".join(assistant_responses) - # verify based on schema type - match schema_type: + reward = self.evaluate_structured_output_response(schema_type, schema_str, response_text) + return BaseVerifyResponse(**body.model_dump(), reward=reward) + + # ----- Helpers ----- # + def parse_content(self, schema_type: SchemaType, content: str): + match schema_type.lower(): case SchemaType.JSON: - reward = self.evaluate_structured_output_response_json(schema_str, response_text) + parsed = json.loads(content) + case SchemaType.YAML: + parsed = yaml.safe_load(content) + case SchemaType.XML: + parsed = xmltodict.parse(content) case _: - raise NotImplementedError(f"SchemaType must be one of {list(SchemaType)}, got {schema_type} !") + parsed = None + return parsed - return BaseVerifyResponse(**body.model_dump(), reward=reward) - - # ----- JSON Helpers ----- # - def strictify_schema_json(self, schema: Dict[str, Any]): + def strictify_schema(self, schema: Dict[str, Any]): """Make a schema strict as per OpenAPI guidelines""" if isinstance(schema, Dict): if "properties" in schema: schema["required"] = list(schema["properties"]) schema["additionalProperties"] = False for k, v in schema.items(): - self.strictify_schema_json(v) - - def evaluate_structured_output_response_json(self, schema_str: str, response_text: str) -> bool: + self.strictify_schema(v) + + def coerce_xml_types(self, data: Any, schema: Dict[str, Any]) -> Any: + """Recursively coerce xmltodict string values to match the JSON schema types. + + xmltodict.parse() returns all leaf values as strings. This method walks the + parsed data alongside the schema and converts values where possible. + On conversion failure the original value is returned so that schema + validation can report the error. + """ + if not isinstance(schema, dict) or "type" not in schema: + return data + + schema_type = schema["type"] + + if schema_type == "object" and isinstance(data, dict): + properties = schema.get("properties", {}) + coerced = {} + for key, value in data.items(): + if key in properties: + coerced[key] = self.coerce_xml_types(value, properties[key]) + else: + coerced[key] = value + return coerced + + if schema_type == "array": + items_schema = schema.get("items", {}) + if not isinstance(data, list): + data = [data] if data is not None else [] + return [self.coerce_xml_types(item, items_schema) for item in data] + + if isinstance(data, str): + try: + if schema_type == "integer": + return int(data) + if schema_type == "number": + return float(data) + if schema_type == "boolean": + lower = data.lower() + if lower in ("true", "1"): + return True + if lower in ("false", "0"): + return False + except (ValueError, AttributeError): + pass + + return data + + def evaluate_structured_output_response( + self, schema_type: SchemaType, schema_str: str, response_text: str + ) -> bool: try: schema = json.loads(schema_str) - self.strictify_schema_json(schema) - response_obj = json.loads(response_text) + self.strictify_schema(schema) + response_obj = self.parse_content(schema_type, response_text) + if schema_type == SchemaType.XML and self.config.xml_coerce_types: + response_obj = self.coerce_xml_types(response_obj, schema) validate_against_schema_openapi(response_obj, schema) return 1.0 except Exception: diff --git a/resources_servers/structured_outputs/data/structured_outputs_260309_nano_v3_sdg_json_yaml_xml_example.jsonl b/resources_servers/structured_outputs/data/structured_outputs_260309_nano_v3_sdg_json_yaml_xml_example.jsonl new file mode 100644 index 000000000..9b0c1dd43 --- /dev/null +++ b/resources_servers/structured_outputs/data/structured_outputs_260309_nano_v3_sdg_json_yaml_xml_example.jsonl @@ -0,0 +1,5 @@ +{"responses_create_params":{"input":[{"content":"Response Format (XML with root ): {\"type\": \"object\", \"properties\": {\"output\": {\"type\": \"object\", \"required\": [\"petDetails\", \"ownerInformation\", \"reportMetadata\"], \"properties\": {\"petDetails\": {\"type\": \"object\", \"required\": [\"species\", \"breed\", \"identifyingFeatures\"], \"additionalProperties\": false, \"properties\": {\"species\": {\"type\": \"string\", \"enum\": [\"Dog\", \"Cat\", \"Bird\", \"Rabbit\", \"Reptile\", \"Other\"]}, \"breed\": {\"type\": \"string\", \"maxLength\": 50}, \"name\": {\"type\": \"string\", \"maxLength\": 30}, \"age\": {\"type\": \"number\", \"minimum\": 0, \"maximum\": 30}, \"weightKg\": {\"type\": \"number\", \"minimum\": 0.1, \"maximum\": 200}, \"identifyingFeatures\": {\"type\": \"object\", \"required\": [\"color\", \"markings\", \"microchip\"], \"additionalProperties\": false, \"properties\": {\"color\": {\"type\": \"string\", \"maxLength\": 30}, \"markings\": {\"type\": \"string\", \"maxLength\": 100}, \"microchip\": {\"type\": \"boolean\"}, \"microchipId\": {\"type\": \"string\", \"pattern\": \"^[A-Z0-9]{10,16}$\", \"maxLength\": 16}, \"distinguishingFeatures\": {\"type\": \"array\", \"items\": {\"type\": \"string\", \"maxLength\": 50}, \"maxItems\": 5}}}}}, \"ownerInformation\": {\"type\": \"object\", \"required\": [\"fullName\", \"contactPhone\", \"lastSeenLocation\"], \"additionalProperties\": false, \"properties\": {\"fullName\": {\"type\": \"string\", \"maxLength\": 100}, \"contactPhone\": {\"type\": \"string\", \"pattern\": \"^\\\\+?[1-9]\\\\d{1,14}$\"}, \"contactEmail\": {\"type\": \"string\", \"format\": \"email\", \"maxLength\": 100}, \"address\": {\"type\": \"object\", \"required\": [\"street\", \"city\", \"postalCode\", \"country\"], \"additionalProperties\": false, \"properties\": {\"street\": {\"type\": \"string\", \"maxLength\": 100}, \"city\": {\"type\": \"string\", \"maxLength\": 50}, \"postalCode\": {\"type\": \"string\", \"maxLength\": 20}, \"country\": {\"type\": \"string\", \"maxLength\": 50}}}, \"emergencyContact\": {\"type\": \"object\", \"required\": [\"name\", \"phone\"], \"additionalProperties\": false, \"properties\": {\"name\": {\"type\": \"string\", \"maxLength\": 100}, \"phone\": {\"type\": \"string\", \"pattern\": \"^\\\\+?[1-9]\\\\d{1,14}$\"}}}}}, \"reportMetadata\": {\"type\": \"object\", \"required\": [\"dateReported\", \"isUrgent\", \"reportStatus\"], \"additionalProperties\": false, \"properties\": {\"dateReported\": {\"type\": \"string\", \"format\": \"date-time\"}, \"lastUpdated\": {\"type\": \"string\", \"format\": \"date-time\"}, \"isUrgent\": {\"type\": \"boolean\"}, \"reportStatus\": {\"type\": \"string\", \"enum\": [\"Open\", \"In Progress\", \"Found\", \"Closed\"]}, \"reportingOfficer\": {\"type\": \"object\", \"required\": [\"id\", \"name\"], \"additionalProperties\": false, \"properties\": {\"id\": {\"type\": \"string\", \"pattern\": \"^[A-Z]{2}\\\\d{4}$\"}, \"name\": {\"type\": \"string\", \"maxLength\": 100}, \"station\": {\"type\": \"string\", \"maxLength\": 50}}}}}}, \"additionalProperties\": false}}, \"required\": [\"output\"], \"additionalProperties\": false}","role":"user"},{"content":"\ud83d\udcc4 Document:\nThe lost animal in this case is a dog, specifically a Golden Retriever, as documented in the pet details section under the 'species' and 'breed' fields. The dog\u2019s name is Max, which is within the 30-character maximum allowed for the name field. Max is 7 years old, a value that falls within the acceptable age range of 0 to 30 years. He weighs 32.5 kilograms, which is above the minimum threshold of 0.1 kg and below the upper limit of 200 kg defined for the weightKg property. The dog is microchipped, with the microchip field set to true, and the microchip ID is ABC123456789\u2014a 13-character alphanumeric string that matches the required pattern of 10 to 16 uppercase letters or digits as specified by the regular expression ^[A-Z0-9]{10,16}$.\n\nMax's primary coat color is golden brown, recorded in the 'color' field with a character count well under the 30-character limit. He has distinctive markings: a white patch on the chest and a small scar above the right eye, both described within the 100-character maximum allowed for the 'markings' field. Additional distinguishing physical features include a cropped tail and a slight limp on the left hind leg. These are listed in the 'distinguishingFeatures' array, which contains two entries, both under 50 characters and within the maximum of five permitted items.\n\nThe owner of the dog is Sarah Thompson, whose full name is stored in the 'fullName' field of the 'ownerInformation' object and is under the 100-character limit. She resides at 123 Maple Avenue, New York, United States, with a postal code of 10001. The street address, city, country, and postal code all comply with their respective character limits: 100 for street, 50 for city, 50 for country, and 20 for postal code. Sarah\u2019s primary contact number is +14155552671, a valid international phone number formatted according to the pattern ^\\\\+?[1-9]\\\\d{1,14}$, and she has provided an email address\u2014sarah.thompson@example.com\u2014that is properly structured and within the 100-character limit.\n\nThe emergency contact listed is James Thompson, believed to be a family member, though the relationship field is optional and not explicitly recorded. His contact number is +14155559876, which also conforms to the required international phone number format. The dog was last seen near Central Park, Manhattan, New York, a required field located within the 'ownerInformation' object despite its relevance to the pet\u2019s location.\n\nThe lost pet report was filed on October 5, 2023, at 08:45:30 UTC, recorded in ISO 8601 format as 2023-10-05T08:45:30Z in the 'dateReported' field. The case has been flagged as urgent, with the 'isUrgent' boolean set to true, indicating elevated priority due to the circumstances of the disappearance. The current status of the report is \"Open,\" one of the valid enum values that also include \"In Progress,\" \"Found,\" and \"Closed.\" The reporting officer assigned to the case is Lieutenant Maria Gonzalez, stationed at the Manhattan Central Precinct. Her officer ID is NY1234, which matches the required pattern of two uppercase letters followed by four digits. Her name is under the 100-character limit, and the station name is within the 50-character maximum allowed for that field.\n\nKey Facts:\n- Species: Dog\n- Breed: Golden Retriever (within 50-character limit)\n- Name: Max (2 characters, under 30-char limit)\n- Age: 7 years (within 0\u201330 range)\n- Weight: 32.5 kg (within 0.1\u2013200 kg range)\n- Microchipped: Yes\n- Microchip ID: ABC123456789 (13-character, matches ^[A-Z0-9]{10,16}$)\n- Color: Golden brown (within 30-char limit)\n- Markings: White patch on chest, small scar above right eye (within 100-char limit)\n- Distinguishing Features:\n - Cropped tail\n - Slightly limps on left hind leg\n - (2 items, both under 50 chars; max allowed: 5)\n- Owner: Sarah Thompson (under 100 chars)\n- Owner Address:\n - Street: 123 Maple Avenue (under 100 chars)\n - City: New York (under 50 chars)\n - Postal Code: 10001 (under 20 chars)\n - Country: United States (under 50 chars)\n- Owner Contact:\n - Phone: +14155552671 (valid international format)\n - Email: sarah.thompson@example.com (valid format, under 100 chars)\n- Emergency Contact:\n - Name: James Thompson\n - Phone: +14155559876 (valid format)\n- Last Seen: Near Central Park, Manhattan, New York\n- Report Filed: 2023-10-05T08:45:30Z (ISO 8601 format)\n- Urgency: Urgent (isUrgent = true)\n- Report Status: Open (valid enum value)\n- Reporting Officer:\n - Name: Lieutenant Maria Gonzalez (under 100 chars)\n - ID: NY1234 (matches pattern: two uppercase letters + four digits)\n - Station: Manhattan Central Precinct (under 50 chars)\n\n\ud83d\udcdd Task: Map the content of this document to the provided data structure.","role":"user"}]},"schema_str":"{\"type\": \"object\", \"properties\": {\"output\": {\"type\": \"object\", \"required\": [\"petDetails\", \"ownerInformation\", \"reportMetadata\"], \"properties\": {\"petDetails\": {\"type\": \"object\", \"required\": [\"species\", \"breed\", \"identifyingFeatures\"], \"additionalProperties\": false, \"properties\": {\"species\": {\"type\": \"string\", \"enum\": [\"Dog\", \"Cat\", \"Bird\", \"Rabbit\", \"Reptile\", \"Other\"]}, \"breed\": {\"type\": \"string\", \"maxLength\": 50}, \"name\": {\"type\": \"string\", \"maxLength\": 30}, \"age\": {\"type\": \"number\", \"minimum\": 0, \"maximum\": 30}, \"weightKg\": {\"type\": \"number\", \"minimum\": 0.1, \"maximum\": 200}, \"identifyingFeatures\": {\"type\": \"object\", \"required\": [\"color\", \"markings\", \"microchip\"], \"additionalProperties\": false, \"properties\": {\"color\": {\"type\": \"string\", \"maxLength\": 30}, \"markings\": {\"type\": \"string\", \"maxLength\": 100}, \"microchip\": {\"type\": \"boolean\"}, \"microchipId\": {\"type\": \"string\", \"pattern\": \"^[A-Z0-9]{10,16}$\", \"maxLength\": 16}, \"distinguishingFeatures\": {\"type\": \"array\", \"items\": {\"type\": \"string\", \"maxLength\": 50}, \"maxItems\": 5}}}}}, \"ownerInformation\": {\"type\": \"object\", \"required\": [\"fullName\", \"contactPhone\", \"lastSeenLocation\"], \"additionalProperties\": false, \"properties\": {\"fullName\": {\"type\": \"string\", \"maxLength\": 100}, \"contactPhone\": {\"type\": \"string\", \"pattern\": \"^\\\\+?[1-9]\\\\d{1,14}$\"}, \"contactEmail\": {\"type\": \"string\", \"format\": \"email\", \"maxLength\": 100}, \"address\": {\"type\": \"object\", \"required\": [\"street\", \"city\", \"postalCode\", \"country\"], \"additionalProperties\": false, \"properties\": {\"street\": {\"type\": \"string\", \"maxLength\": 100}, \"city\": {\"type\": \"string\", \"maxLength\": 50}, \"postalCode\": {\"type\": \"string\", \"maxLength\": 20}, \"country\": {\"type\": \"string\", \"maxLength\": 50}}}, \"emergencyContact\": {\"type\": \"object\", \"required\": [\"name\", \"phone\"], \"additionalProperties\": false, \"properties\": {\"name\": {\"type\": \"string\", \"maxLength\": 100}, \"phone\": {\"type\": \"string\", \"pattern\": \"^\\\\+?[1-9]\\\\d{1,14}$\"}}}}}, \"reportMetadata\": {\"type\": \"object\", \"required\": [\"dateReported\", \"isUrgent\", \"reportStatus\"], \"additionalProperties\": false, \"properties\": {\"dateReported\": {\"type\": \"string\", \"format\": \"date-time\"}, \"lastUpdated\": {\"type\": \"string\", \"format\": \"date-time\"}, \"isUrgent\": {\"type\": \"boolean\"}, \"reportStatus\": {\"type\": \"string\", \"enum\": [\"Open\", \"In Progress\", \"Found\", \"Closed\"]}, \"reportingOfficer\": {\"type\": \"object\", \"required\": [\"id\", \"name\"], \"additionalProperties\": false, \"properties\": {\"id\": {\"type\": \"string\", \"pattern\": \"^[A-Z]{2}\\\\d{4}$\"}, \"name\": {\"type\": \"string\", \"maxLength\": 100}, \"station\": {\"type\": \"string\", \"maxLength\": 50}}}}}}, \"additionalProperties\": false}}, \"required\": [\"output\"], \"additionalProperties\": false}","schema_type":"xml","schema_fields_count":"3"} +{"responses_create_params":{"input":[{"content":"# Instructions:\nGenerate a YAML output that strictly adheres to the specified schema based on the document provided.\n\n# Document:\n**Home Repair Assessment \u2013 Comprehensive Fact Document**\n\nThis document outlines the complete details of a home repair assessment identified by the unique repair ID **RP-123456**. This identifier adheres strictly to the defined alphanumeric schema format: it begins with the prefix \"RP-\" followed by exactly six numerical digits, conforming to the regular expression pattern `^RP-[0-9]{6}$`. This standardized ID ensures traceability across documentation, work orders, and administrative tracking systems throughout the lifecycle of the repair.\n\nThe property associated with this assessment is located at **123 Maple Street, Springfield, Illinois, 62701**. The address is formally structured as a nested JSON object containing the required string fields: `street`, `city`, `state`, and `zipCode`. No additional or optional fields are present, ensuring full compliance with the schema\u2019s structural and data-type requirements. The location is situated in a residential area of Springfield, a mid-sized city in central Illinois, and the zip code 62701 corresponds to the downtown and near-westside neighborhoods.\n\nThe primary issue identified during the inspection falls under the **plumbing** category. According to the schema, the `issueCategory` field is restricted to a predefined enumeration of valid string values: `'plumbing'`, `'electrical'`, `'structural'`, `'roofing'`, `'HVAC'`, `'interior'`, and `'exterior'`. The selection of 'plumbing' indicates that the problem involves components of the home\u2019s water delivery, drainage, waste systems, or plumbing fixtures such as sinks, toilets, or water heaters. This classification helps route the repair to appropriate specialists and determines permitting and inspection requirements.\n\nThe severity of the plumbing issue has been assessed as **urgent**. The `severityLevel` field must be one of the following allowed values: `'low'`, `'medium'`, `'high'`, or `'urgent'`. An 'urgent' classification signifies that the issue presents an immediate risk\u2014such as a burst pipe, major water leak, or sewage backup\u2014that could lead to extensive water damage, mold growth, or unsafe living conditions if not addressed promptly. This designation triggers expedited scheduling and resource allocation.\n\nThe estimated cost for resolving this repair is structured within a dedicated `estimatedCost` object, which includes:\n- **Minimum cost**: $850\n- **Maximum cost**: $1,500\n- **Currency**: USD (U.S. Dollars)\n\nAll values comply with schema requirements: `min` and `max` are non-negative numeric values, with `min` less than or equal to `max`, and `currency` is selected from the allowed set: `['USD', 'EUR', 'GBP']`. The cost range accounts for labor, materials, permits, and potential contingencies such as wall patching or flooring repairs due to water damage. The use of USD aligns with the property\u2019s location and standard billing practices in the United States.\n\nThis repair **requires a licensed professional**, as indicated by the boolean value `true` in the `requiresProfessional` field. This requirement is due to the technical complexity, safety risks, and regulatory codes associated with plumbing work, particularly when involving pressurized water systems, waste lines, or gas-powered water heaters. Only certified plumbers with appropriate state licensure are permitted to perform and sign off on such repairs in Illinois.\n\nThe repair timeline has been formally established with the following details:\n- **Start date**: 2024-06-10 (ISO 8601 format)\n- **End date**: 2024-06-15 (ISO 8601 format)\n- **Priority level**: high\n\nThe five-day window allows for inspection, parts procurement, execution, and final testing. The `priority` level is set to 'high' to reflect the urgent severity, ensuring that scheduling systems and contractors treat this job with immediate attention. Notably, the timeline object does **not** include any contingency plans\u2014such as delays due to material shortages or weather\u2014indicating that the schedule assumes optimal conditions and uninterrupted workflow. If disruptions occur, a revised timeline will need to be issued separately.\n\nIn summary, this repair assessment represents a high-priority, urgent plumbing issue at a residential property in Springfield, IL, requiring professional intervention within a defined six-day window and an estimated expenditure between $850 and $1,500 in U.S. dollars. All data fields strictly adhere to the defined schema in terms of format, value constraints, and structure, ensuring interoperability with automated systems for work order generation, contractor dispatch, and compliance auditing.","role":"user"},{"content":"Structure your response according to the following schema specification: additionalProperties: false\nproperties:\n estimatedCost:\n additionalProperties: false\n properties:\n currency:\n enum:\n - USD\n - EUR\n - GBP\n type: string\n max:\n minimum: 0\n type: number\n min:\n minimum: 0\n type: number\n required:\n - min\n - max\n - currency\n type: object\n issueCategory:\n enum:\n - plumbing\n - electrical\n - structural\n - roofing\n - HVAC\n - interior\n - exterior\n type: string\n propertyAddress:\n additionalProperties: false\n properties:\n city:\n type: string\n state:\n type: string\n street:\n type: string\n zipCode:\n type: string\n required:\n - street\n - city\n - state\n - zipCode\n type: object\n repairId:\n pattern: ^RP-[0-9]{6}$\n type: string\n repairTimeline:\n additionalProperties: false\n properties:\n contingencies:\n items:\n additionalProperties: false\n properties:\n action:\n type: string\n condition:\n type: string\n required:\n - condition\n - action\n type: object\n type: array\n endDate:\n format: date\n type: string\n priority:\n enum:\n - low\n - normal\n - high\n type: string\n startDate:\n format: date\n type: string\n required:\n - startDate\n - endDate\n - priority\n type: object\n requiresProfessional:\n type: boolean\n severityLevel:\n enum:\n - low\n - medium\n - high\n - urgent\n type: string\nrequired:\n- repairId\n- propertyAddress\n- issueCategory\n- severityLevel\n- estimatedCost\n- requiresProfessional\n- repairTimeline\ntype: object\n. Return only the YAML output.","role":"user"}]},"schema_str":"{\"type\": \"object\", \"required\": [\"repairId\", \"propertyAddress\", \"issueCategory\", \"severityLevel\", \"estimatedCost\", \"requiresProfessional\", \"repairTimeline\"], \"properties\": {\"repairId\": {\"type\": \"string\", \"pattern\": \"^RP-[0-9]{6}$\"}, \"propertyAddress\": {\"type\": \"object\", \"required\": [\"street\", \"city\", \"state\", \"zipCode\"], \"properties\": {\"street\": {\"type\": \"string\"}, \"city\": {\"type\": \"string\"}, \"state\": {\"type\": \"string\"}, \"zipCode\": {\"type\": \"string\"}}, \"additionalProperties\": false}, \"issueCategory\": {\"type\": \"string\", \"enum\": [\"plumbing\", \"electrical\", \"structural\", \"roofing\", \"HVAC\", \"interior\", \"exterior\"]}, \"severityLevel\": {\"type\": \"string\", \"enum\": [\"low\", \"medium\", \"high\", \"urgent\"]}, \"estimatedCost\": {\"type\": \"object\", \"required\": [\"min\", \"max\", \"currency\"], \"properties\": {\"min\": {\"type\": \"number\", \"minimum\": 0}, \"max\": {\"type\": \"number\", \"minimum\": 0}, \"currency\": {\"type\": \"string\", \"enum\": [\"USD\", \"EUR\", \"GBP\"]}}, \"additionalProperties\": false}, \"requiresProfessional\": {\"type\": \"boolean\"}, \"repairTimeline\": {\"type\": \"object\", \"required\": [\"startDate\", \"endDate\", \"priority\"], \"properties\": {\"startDate\": {\"type\": \"string\", \"format\": \"date\"}, \"endDate\": {\"type\": \"string\", \"format\": \"date\"}, \"priority\": {\"type\": \"string\", \"enum\": [\"low\", \"normal\", \"high\"]}, \"contingencies\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"required\": [\"condition\", \"action\"], \"properties\": {\"condition\": {\"type\": \"string\"}, \"action\": {\"type\": \"string\"}}, \"additionalProperties\": false}}}, \"additionalProperties\": false}}, \"additionalProperties\": false}","schema_type":"yaml","schema_fields_count":"7"} +{"responses_create_params":{"input":[{"content":"Given the following text:\n\nWhen pairing wine with grilled salmon, a medium-bodied white wine such as Chardonnay or a Pinot Noir is ideal. These wines offer balanced acidity that complements the natural richness of the fish, while their flavor profiles\u2014ranging from crisp citrus and green apple notes in Chardonnay to subtle red berry undertones in Pinot Noir\u2014enhance the dish without overpowering it. The acidity also helps refresh the palate between bites, ensuring a harmonious dining experience.\n\nAcidity in wine plays a critical role in food pairing, particularly with fatty foods. It acts as a palate cleanser, cutting through the mouth-coating richness of fats and preventing a heavy or greasy sensation. This crisp, refreshing quality not only restores balance but also increases the perception of freshness and clarity in each bite, making the meal feel lighter and more enjoyable.\n\nFor a cheese platter featuring blue cheeses\u2014such as Roquefort, Gorgonzola, or Stilton\u2014a sweet dessert wine is highly recommended. Wines like Sauternes from Bordeaux or a late-harvest Riesling from Germany are excellent choices. Their high residual sugar content effectively counteracts the intense saltiness and pungent aroma characteristic of blue cheeses, creating a harmonious interplay of sweet and savory. The wine\u2019s complexity and slight honeyed richness also mirror the creamy, bold textures of the cheese, elevating the overall tasting experience.\n\nTannins in red wines, especially those found in full-bodied varieties like Cabernet Sauvignon, Merlot, or Nebbiolo, interact dynamically with red meat. These polyphenolic compounds bind to proteins in the meat, softening the wine\u2019s astringency and rounding out its structure. This interaction not only enhances the perception of the wine's smoothness but also amplifies the meat\u2019s natural tenderness and depth of flavor, creating a synergistic effect that makes both the wine and the food more satisfying.\n\nWhen pairing wine with spicy food\u2014such as Indian curries, Thai dishes, or Mexican salsas\u2014it is essential to avoid wines high in alcohol or tannins, as these components can amplify the heat and lead to an unpleasant burn. Instead, wines with a touch of residual sugar, such as off-dry Gew\u00fcrztraminer or a slightly chilled Ros\u00e9, are ideal. The gentle sweetness in these wines tempers the spiciness, while their aromatic profiles (floral, lychee, rose petal in Gew\u00fcrztraminer; ripe strawberry, cherry in Ros\u00e9) provide a refreshing contrast. Their moderate acidity and lower alcohol content also contribute to a cooling sensation on the palate, balancing the heat without overwhelming the dish.\n\nParse the document and populate the following data model.\nI'd like you to format your response as a JSON object matching the provided schema: {'title': 'WineAndFoodPairingRecommendation', 'type': 'object', 'additionalProperties': False, 'required': ['dishName', 'wineType', 'flavorProfile', 'acidityLevel', 'tanninLevel', 'servingTemperature', 'isRecommended'], 'properties': {'dishName': {'type': 'string', 'description': 'The name of the dish being paired with wine.'}, 'wineType': {'type': 'string', 'enum': ['Red', 'White', 'Ros\u00e9', 'Sparkling', 'Dessert'], 'description': 'The type of wine recommended for pairing.'}, 'flavorProfile': {'type': 'object', 'additionalProperties': False, 'required': ['primary', 'secondary'], 'properties': {'primary': {'type': 'string', 'description': 'The dominant flavor note in the wine.'}, 'secondary': {'type': 'string', 'description': 'A secondary flavor note in the wine.'}}}, 'acidityLevel': {'type': 'string', 'enum': ['Low', 'Medium', 'High'], 'description': 'The level of acidity in the wine.'}, 'tanninLevel': {'type': 'string', 'enum': ['Low', 'Medium', 'High'], 'description': 'The level of tannins in the wine.'}, 'servingTemperature': {'type': 'number', 'minimum': 0, 'maximum': 30, 'description': 'Serving temperature in degrees Celsius.'}, 'isRecommended': {'type': 'boolean', 'description': 'Whether the pairing is recommended based on flavor compatibility.'}}}","role":"user"}]},"schema_str":"{\"title\": \"WineAndFoodPairingRecommendation\", \"type\": \"object\", \"additionalProperties\": false, \"required\": [\"dishName\", \"wineType\", \"flavorProfile\", \"acidityLevel\", \"tanninLevel\", \"servingTemperature\", \"isRecommended\"], \"properties\": {\"dishName\": {\"type\": \"string\", \"description\": \"The name of the dish being paired with wine.\"}, \"wineType\": {\"type\": \"string\", \"enum\": [\"Red\", \"White\", \"Ros\\u00e9\", \"Sparkling\", \"Dessert\"], \"description\": \"The type of wine recommended for pairing.\"}, \"flavorProfile\": {\"type\": \"object\", \"additionalProperties\": false, \"required\": [\"primary\", \"secondary\"], \"properties\": {\"primary\": {\"type\": \"string\", \"description\": \"The dominant flavor note in the wine.\"}, \"secondary\": {\"type\": \"string\", \"description\": \"A secondary flavor note in the wine.\"}}}, \"acidityLevel\": {\"type\": \"string\", \"enum\": [\"Low\", \"Medium\", \"High\"], \"description\": \"The level of acidity in the wine.\"}, \"tanninLevel\": {\"type\": \"string\", \"enum\": [\"Low\", \"Medium\", \"High\"], \"description\": \"The level of tannins in the wine.\"}, \"servingTemperature\": {\"type\": \"number\", \"minimum\": 0, \"maximum\": 30, \"description\": \"Serving temperature in degrees Celsius.\"}, \"isRecommended\": {\"type\": \"boolean\", \"description\": \"Whether the pairing is recommended based on flavor compatibility.\"}}}","schema_type":"json","schema_fields_count":"7"} +{"responses_create_params":{"input":[{"content":"Extract the information from the text and format it as YAML matching this schema.\n\nDocument:\nScience fairs serve as vital educational platforms within schools and academic institutions, designed to immerse students in authentic scientific inquiry and foster deep engagement with STEM (science, technology, engineering, and mathematics) disciplines. The primary purpose of a science fair is to offer students a structured yet creative environment in which they can formulate hypotheses, design and execute experiments, collect and analyze data, and communicate their findings through visual displays and oral presentations. This process not only reinforces the scientific method but also cultivates essential 21st-century skills such as critical thinking, problem-solving, independent research, and effective communication. By presenting their projects to peers, teachers, and expert judges, students gain valuable experience in defending their work, receiving constructive feedback, and learning how to articulate complex ideas clearly and logically.\n\nEvaluation of student projects at science fairs follows a systematic and standardized process conducted by trained judges using detailed rubrics. These rubrics typically assess multiple dimensions of the project, including the clarity and feasibility of the hypothesis, the scientific rigor of the experimental design, the thoroughness and accuracy of data collection and analysis, the originality and novelty of the research question, the coherence and professionalism of the project display, and the quality of the student's oral presentation and ability to answer technical questions. Judges, often drawn from local universities, research institutions, industries, or professional STEM organizations, evaluate projects through a combination of visual inspection, review of written research reports, and face-to-face interviews with students, allowing them to gauge both the student\u2019s technical understanding and their capacity to think critically under scrutiny.\n\nCommonly exhibited projects at science fairs span a broad range of scientific and technological fields, reflecting the interdisciplinary nature of modern science. These include, but are not limited to, biology experiments (such as studying plant growth under different light conditions or analyzing microbial cultures), chemistry investigations (like exploring reaction rates or synthesizing compounds), physics demonstrations (covering motion, energy transfer, or wave properties), engineering projects (such as designing sustainable models, robotic systems, or structural prototypes), environmental science studies (focusing on pollution levels, climate change impacts, or conservation strategies), and computer science initiatives (ranging from algorithm development and data analysis to mobile app creation and artificial intelligence applications). Many projects incorporate the use of sensors, data loggers, statistical software, or survey instruments, and often aim to address tangible real-world problems such as water purification, energy efficiency, or public health awareness, emphasizing the practical relevance of scientific knowledge.\n\nJudges play a central role in ensuring the integrity and educational value of the science fair experience. Comprising professionals with advanced degrees and real-world experience in STEM fields\u2014such as research scientists, practicing engineers, medical professionals, university faculty, and industry experts\u2014judges are responsible for impartially assessing projects based on predefined criteria. Their responsibilities include reviewing project boards and accompanying documentation, analyzing data presentation techniques, and engaging students in probing questioning during live interviews. This interaction helps determine whether the student truly comprehends their project\u2019s underlying principles, can distinguish between correlation and causation, understands potential sources of error, and has considered alternative explanations. The feedback provided by judges often serves as a powerful learning tool, encouraging students to reflect on their work, identify areas for improvement, and deepen their scientific reasoning\u2014thus extending the educational impact of the science fair beyond the event itself.\nStructure your response according to the following schema specification: {'projectTitle': {'type': 'string'}, 'studentName': {'type': 'string'}, 'gradeLevel': {'type': 'string'}, 'schoolName': {'type': 'string'}, 'category': {'type': 'string', 'enum': ['Physics', 'Chemistry', 'Biology', 'Computer Science', 'Engineering', 'Environmental Science']}, 'abstract': {'type': 'string'}, 'hypothesis': {'type': 'string'}, 'methodology': {'type': 'object', 'required': ['experimentalDesign', 'materials', 'procedure'], 'properties': {'experimentalDesign': {'type': 'string'}, 'materials': {'type': 'array', 'items': {'type': 'string'}}, 'procedure': {'type': 'array', 'items': {'type': 'string'}}}, 'additionalProperties': False}, 'results': {'type': 'object', 'required': ['dataSummary', 'visualizations', 'conclusions'], 'properties': {'dataSummary': {'type': 'string'}, 'visualizations': {'type': 'array', 'items': {'type': 'object', 'required': ['type', 'fileReference'], 'properties': {'type': {'type': 'string', 'enum': ['chart', 'graph', 'diagram', 'image']}, 'fileReference': {'type': 'string'}}, 'additionalProperties': False}}, 'conclusions': {'type': 'string'}}, 'additionalProperties': False}, 'isOriginalResearch': {'type': 'boolean'}}. Return only the YAML output.","role":"user"}]},"schema_str":"{\"title\": \"ScienceFairProjectSubmission\", \"type\": \"object\", \"properties\": {\"projectTitle\": {\"type\": \"string\"}, \"studentName\": {\"type\": \"string\"}, \"gradeLevel\": {\"type\": \"string\"}, \"schoolName\": {\"type\": \"string\"}, \"category\": {\"type\": \"string\", \"enum\": [\"Physics\", \"Chemistry\", \"Biology\", \"Computer Science\", \"Engineering\", \"Environmental Science\"]}, \"abstract\": {\"type\": \"string\"}, \"hypothesis\": {\"type\": \"string\"}, \"methodology\": {\"type\": \"object\", \"required\": [\"experimentalDesign\", \"materials\", \"procedure\"], \"properties\": {\"experimentalDesign\": {\"type\": \"string\"}, \"materials\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}}, \"procedure\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"additionalProperties\": false}, \"results\": {\"type\": \"object\", \"required\": [\"dataSummary\", \"visualizations\", \"conclusions\"], \"properties\": {\"dataSummary\": {\"type\": \"string\"}, \"visualizations\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"required\": [\"type\", \"fileReference\"], \"properties\": {\"type\": {\"type\": \"string\", \"enum\": [\"chart\", \"graph\", \"diagram\", \"image\"]}, \"fileReference\": {\"type\": \"string\"}}, \"additionalProperties\": false}}, \"conclusions\": {\"type\": \"string\"}}, \"additionalProperties\": false}, \"isOriginalResearch\": {\"type\": \"boolean\"}}, \"required\": [\"projectTitle\", \"studentName\", \"gradeLevel\", \"schoolName\", \"category\", \"abstract\", \"hypothesis\", \"methodology\", \"results\", \"isOriginalResearch\"], \"additionalProperties\": false}","schema_type":"yaml","schema_fields_count":"10"} +{"responses_create_params":{"input":[{"content":"Format your response as a JSON object adhering to:\n- Schema structure: {'patientId': {'type': 'string', 'description': 'Unique identifier for the patient'}, 'petType': {'type': 'string', 'enum': ['cat', 'dog', 'bird', 'rabbit', 'reptile', 'fish', 'rodent', 'other'], 'description': 'Type of pet the patient is exposed to'}, 'allergyConfirmed': {'type': 'boolean', 'description': 'Whether the pet allergy has been clinically confirmed'}, 'symptoms': {'type': 'array', 'items': {'type': 'string', 'enum': ['sneezing', 'itchy_eyes', 'nasal_congestion', 'coughing', 'wheezing', 'skin_rash', 'shortness_of_breath', 'none'], 'description': 'List of allergic symptoms experienced'}, 'minItems': 0, 'maxItems': 8}, 'exposureHistory': {'type': 'object', 'additionalProperties': False, 'required': ['durationOfExposure', 'frequencyOfContact', 'proximityLevel'], 'properties': {'durationOfExposure': {'type': 'string', 'enum': ['less_than_1_hour', '1-3_hours', '4-8_hours', 'more_than_8_hours', 'continuous'], 'description': 'How long the patient is exposed to the pet'}, 'frequencyOfContact': {'type': 'string', 'enum': ['rare', 'occasional', 'weekly', 'daily', 'constant'], 'description': 'How often contact occurs'}, 'proximityLevel': {'type': 'string', 'enum': ['distant', 'nearby', 'in_room', 'on_person', 'direct_contact'], 'description': 'Physical distance during exposure'}}}, 'diagnosticTests': {'type': 'array', 'items': {'type': 'object', 'additionalProperties': False, 'required': ['testType', 'result', 'datePerformed'], 'properties': {'testType': {'type': 'string', 'enum': ['skin_prick', 'blood_serum', 'spirometry', 'allergen_specific_ige'], 'description': 'Type of allergy diagnostic test'}, 'result': {'type': 'string', 'enum': ['positive', 'negative', 'indeterminate'], 'description': 'Result of the test'}, 'datePerformed': {'type': 'string', 'format': 'date', 'description': 'Date the test was performed'}}}, 'minItems': 0, 'maxItems': 4}, 'triggerAvoidancePlan': {'type': 'object', 'additionalProperties': False, 'required': ['homePrecautions', 'workPrecautions', 'travelTips'], 'properties': {'homePrecautions': {'type': 'array', 'items': {'type': 'string', 'enum': ['keep pet out of bedroom', 'use HEPA filters', 'frequent vacuuming', 'wet mopping', 'wash hands after contact', 'no pet grooming indoors', 'use air purifiers'], 'description': 'Measures to reduce exposure at home'}, 'minItems': 1}, 'workPrecautions': {'type': 'array', 'items': {'type': 'string', 'enum': ['avoid pet in office', 'use personal air filter', 'request pet-free environment', 'no indoor pet visits', 'inform colleagues'], 'description': 'Measures to reduce exposure at work'}, 'minItems': 1}, 'travelTips': {'type': 'array', 'items': {'type': 'string', 'enum': ['check pet policies', 'stay in pet-free accommodations', 'avoid homes with pets', 'carry emergency medication', 'have a backup plan'], 'description': 'Travel-related avoidance strategies'}, 'minItems': 1}}}, 'medicationPlan': {'type': 'object', 'additionalProperties': False, 'required': ['medications', 'dosage', 'frequency', 'duration'], 'properties': {'medications': {'type': 'array', 'items': {'type': 'string', 'enum': ['antihistamines', 'nasal corticosteroids', 'leukotriene_receptor_antagonists', 'mast_cell_stabilizers', 'epinephrine_injector', 'none'], 'description': 'List of medications prescribed'}, 'minItems': 0}, 'dosage': {'type': 'string', 'description': 'Dosage instructions'}, 'frequency': {'type': 'string', 'enum': ['once_daily', 'twice_daily', 'three_times_daily', 'as_needed', 'every_6_hours', 'every_12_hours'], 'description': 'How often medication is taken'}, 'duration': {'type': 'string', 'description': 'Length of treatment period'}}}, 'emergencyProtocol': {'type': 'object', 'additionalProperties': False, 'required': ['triggerSymptoms', 'stepsToTake', 'emergencyContact'], 'properties': {'triggerSymptoms': {'type': 'array', 'items': {'type': 'string', 'enum': ['severe_wheezing', 'trouble_breathing', 'swelling_of_face_lips', 'rapid_pulse', 'loss_of_consciousness', 'chest_pain'], 'description': 'Symptoms indicating emergency'}, 'minItems': 1}, 'stepsToTake': {'type': 'array', 'items': {'type': 'string', 'description': 'Action steps during an allergic emergency'}, 'minItems': 1}, 'emergencyContact': {'type': 'string', 'description': 'Name and number for emergency contact'}}}, 'familyHistory': {'type': 'object', 'additionalProperties': False, 'required': ['hasFamilyAllergy', 'allergyDetails'], 'properties': {'hasFamilyAllergy': {'type': 'boolean', 'description': 'Whether any family members have allergies'}, 'allergyDetails': {'type': 'string', 'description': \"Description of family members' allergies, if known\"}}}, 'environmentalFactors': {'type': 'object', 'additionalProperties': False, 'required': ['indoorAirQuality', 'humidityLevel', 'exposureToPollen'], 'properties': {'indoorAirQuality': {'type': 'string', 'enum': ['excellent', 'good', 'moderate', 'poor', 'very_poor'], 'description': 'General quality of indoor air'}, 'humidityLevel': {'type': 'string', 'enum': ['low', 'optimal', 'high', 'very_high'], 'description': 'Level of indoor humidity'}, 'exposureToPollen': {'type': 'boolean', 'description': 'Whether patient is also exposed to pollen, which may worsen pet allergies'}}}, 'followUpSchedule': {'type': 'object', 'additionalProperties': False, 'required': ['nextAppointment', 'testReevaluation', 'reviewDate'], 'properties': {'nextAppointment': {'type': 'string', 'format': 'date', 'description': 'Date of the next clinical follow-up'}, 'testReevaluation': {'type': 'string', 'enum': ['in_3_months', 'in_6_months', 'in_1_year', 'as_needed'], 'description': 'When to repeat diagnostic tests'}, 'reviewDate': {'type': 'string', 'format': 'date', 'description': 'Date to review the entire allergy management plan'}}}}\n- Validation rules:\n * All strings must be properly escaped\n * All data types are verified\n * There are no unnecessary fields added\n * Must pass JSON schema validation\n * Must not be in Markdown format: i.e. not in ```json``` format.\nEnsure compliance with all specifications before responding.","role":"system"},{"content":"# Information\nPet allergies in humans are primarily caused by proteins found in the dander, saliva, and urine of animals, with Fel d 1\u2014a protein present in cat saliva\u2014being one of the most common and potent allergens. These allergens can become airborne as microscopic skin flakes known as dander, which are lightweight and remain suspended in the air for extended periods. When inhaled by sensitive individuals, these particles trigger immune responses leading to symptoms such as sneezing, itching, nasal congestion, and in severe cases, asthma exacerbation. Cat allergies are more prevalent than dog allergies due to the grooming habits of cats, which transfer allergenic proteins like Fel d 1 from their saliva onto their fur, where they persist for months. This widespread persistence increases the likelihood of exposure even in homes where cats are no longer present. Importantly, pet allergies can develop at any age; while some people experience allergic reactions in childhood, others may develop sensitivities later in life, sometimes after years of living with a pet without issues. Contrary to popular belief, the length of a pet\u2019s fur does not significantly influence allergy severity\u2014since the primary allergens are present in dander, saliva, and urine, not in the fur itself, both short-haired and long-haired pets can trigger allergic reactions equally. To reduce allergen exposure, individuals with pet allergies can adopt several mitigation strategies: using vacuum cleaners equipped with HEPA filters to trap allergens, operating air purifiers with HEPA filtration to clean indoor air, bathing pets regularly (as this reduces the amount of allergens on their skin and fur), restricting pets from bedrooms to limit allergen accumulation in sleeping areas, and thoroughly washing hands after handling animals. While certain breeds such as poodles (which shed less) or hairless breeds like the Sphynx cat are often marketed as \u201chypoallergenic,\u201d this classification is misleading\u2014no animal is completely allergen-free, as all mammals produce allergens in their dander, saliva, or urine. Therefore, individuals with allergies should consider individual sensitivities and spend time around specific animals before adopting, as allergic reactions vary significantly between people and even between different animals of the same species.\n\n# Problem: Ensure your output validates against the given JSON schema.","role":"user"}]},"schema_str":"{\"type\": \"object\", \"additionalProperties\": false, \"required\": [\"patientId\", \"petType\", \"allergyConfirmed\", \"symptoms\", \"exposureHistory\", \"diagnosticTests\", \"triggerAvoidancePlan\", \"medicationPlan\", \"emergencyProtocol\", \"familyHistory\", \"environmentalFactors\", \"followUpSchedule\"], \"properties\": {\"patientId\": {\"type\": \"string\", \"description\": \"Unique identifier for the patient\"}, \"petType\": {\"type\": \"string\", \"enum\": [\"cat\", \"dog\", \"bird\", \"rabbit\", \"reptile\", \"fish\", \"rodent\", \"other\"], \"description\": \"Type of pet the patient is exposed to\"}, \"allergyConfirmed\": {\"type\": \"boolean\", \"description\": \"Whether the pet allergy has been clinically confirmed\"}, \"symptoms\": {\"type\": \"array\", \"items\": {\"type\": \"string\", \"enum\": [\"sneezing\", \"itchy_eyes\", \"nasal_congestion\", \"coughing\", \"wheezing\", \"skin_rash\", \"shortness_of_breath\", \"none\"], \"description\": \"List of allergic symptoms experienced\"}, \"minItems\": 0, \"maxItems\": 8}, \"exposureHistory\": {\"type\": \"object\", \"additionalProperties\": false, \"required\": [\"durationOfExposure\", \"frequencyOfContact\", \"proximityLevel\"], \"properties\": {\"durationOfExposure\": {\"type\": \"string\", \"enum\": [\"less_than_1_hour\", \"1-3_hours\", \"4-8_hours\", \"more_than_8_hours\", \"continuous\"], \"description\": \"How long the patient is exposed to the pet\"}, \"frequencyOfContact\": {\"type\": \"string\", \"enum\": [\"rare\", \"occasional\", \"weekly\", \"daily\", \"constant\"], \"description\": \"How often contact occurs\"}, \"proximityLevel\": {\"type\": \"string\", \"enum\": [\"distant\", \"nearby\", \"in_room\", \"on_person\", \"direct_contact\"], \"description\": \"Physical distance during exposure\"}}}, \"diagnosticTests\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"additionalProperties\": false, \"required\": [\"testType\", \"result\", \"datePerformed\"], \"properties\": {\"testType\": {\"type\": \"string\", \"enum\": [\"skin_prick\", \"blood_serum\", \"spirometry\", \"allergen_specific_ige\"], \"description\": \"Type of allergy diagnostic test\"}, \"result\": {\"type\": \"string\", \"enum\": [\"positive\", \"negative\", \"indeterminate\"], \"description\": \"Result of the test\"}, \"datePerformed\": {\"type\": \"string\", \"format\": \"date\", \"description\": \"Date the test was performed\"}}}, \"minItems\": 0, \"maxItems\": 4}, \"triggerAvoidancePlan\": {\"type\": \"object\", \"additionalProperties\": false, \"required\": [\"homePrecautions\", \"workPrecautions\", \"travelTips\"], \"properties\": {\"homePrecautions\": {\"type\": \"array\", \"items\": {\"type\": \"string\", \"enum\": [\"keep pet out of bedroom\", \"use HEPA filters\", \"frequent vacuuming\", \"wet mopping\", \"wash hands after contact\", \"no pet grooming indoors\", \"use air purifiers\"], \"description\": \"Measures to reduce exposure at home\"}, \"minItems\": 1}, \"workPrecautions\": {\"type\": \"array\", \"items\": {\"type\": \"string\", \"enum\": [\"avoid pet in office\", \"use personal air filter\", \"request pet-free environment\", \"no indoor pet visits\", \"inform colleagues\"], \"description\": \"Measures to reduce exposure at work\"}, \"minItems\": 1}, \"travelTips\": {\"type\": \"array\", \"items\": {\"type\": \"string\", \"enum\": [\"check pet policies\", \"stay in pet-free accommodations\", \"avoid homes with pets\", \"carry emergency medication\", \"have a backup plan\"], \"description\": \"Travel-related avoidance strategies\"}, \"minItems\": 1}}}, \"medicationPlan\": {\"type\": \"object\", \"additionalProperties\": false, \"required\": [\"medications\", \"dosage\", \"frequency\", \"duration\"], \"properties\": {\"medications\": {\"type\": \"array\", \"items\": {\"type\": \"string\", \"enum\": [\"antihistamines\", \"nasal corticosteroids\", \"leukotriene_receptor_antagonists\", \"mast_cell_stabilizers\", \"epinephrine_injector\", \"none\"], \"description\": \"List of medications prescribed\"}, \"minItems\": 0}, \"dosage\": {\"type\": \"string\", \"description\": \"Dosage instructions\"}, \"frequency\": {\"type\": \"string\", \"enum\": [\"once_daily\", \"twice_daily\", \"three_times_daily\", \"as_needed\", \"every_6_hours\", \"every_12_hours\"], \"description\": \"How often medication is taken\"}, \"duration\": {\"type\": \"string\", \"description\": \"Length of treatment period\"}}}, \"emergencyProtocol\": {\"type\": \"object\", \"additionalProperties\": false, \"required\": [\"triggerSymptoms\", \"stepsToTake\", \"emergencyContact\"], \"properties\": {\"triggerSymptoms\": {\"type\": \"array\", \"items\": {\"type\": \"string\", \"enum\": [\"severe_wheezing\", \"trouble_breathing\", \"swelling_of_face_lips\", \"rapid_pulse\", \"loss_of_consciousness\", \"chest_pain\"], \"description\": \"Symptoms indicating emergency\"}, \"minItems\": 1}, \"stepsToTake\": {\"type\": \"array\", \"items\": {\"type\": \"string\", \"description\": \"Action steps during an allergic emergency\"}, \"minItems\": 1}, \"emergencyContact\": {\"type\": \"string\", \"description\": \"Name and number for emergency contact\"}}}, \"familyHistory\": {\"type\": \"object\", \"additionalProperties\": false, \"required\": [\"hasFamilyAllergy\", \"allergyDetails\"], \"properties\": {\"hasFamilyAllergy\": {\"type\": \"boolean\", \"description\": \"Whether any family members have allergies\"}, \"allergyDetails\": {\"type\": \"string\", \"description\": \"Description of family members' allergies, if known\"}}}, \"environmentalFactors\": {\"type\": \"object\", \"additionalProperties\": false, \"required\": [\"indoorAirQuality\", \"humidityLevel\", \"exposureToPollen\"], \"properties\": {\"indoorAirQuality\": {\"type\": \"string\", \"enum\": [\"excellent\", \"good\", \"moderate\", \"poor\", \"very_poor\"], \"description\": \"General quality of indoor air\"}, \"humidityLevel\": {\"type\": \"string\", \"enum\": [\"low\", \"optimal\", \"high\", \"very_high\"], \"description\": \"Level of indoor humidity\"}, \"exposureToPollen\": {\"type\": \"boolean\", \"description\": \"Whether patient is also exposed to pollen, which may worsen pet allergies\"}}}, \"followUpSchedule\": {\"type\": \"object\", \"additionalProperties\": false, \"required\": [\"nextAppointment\", \"testReevaluation\", \"reviewDate\"], \"properties\": {\"nextAppointment\": {\"type\": \"string\", \"format\": \"date\", \"description\": \"Date of the next clinical follow-up\"}, \"testReevaluation\": {\"type\": \"string\", \"enum\": [\"in_3_months\", \"in_6_months\", \"in_1_year\", \"as_needed\"], \"description\": \"When to repeat diagnostic tests\"}, \"reviewDate\": {\"type\": \"string\", \"format\": \"date\", \"description\": \"Date to review the entire allergy management plan\"}}}}}","schema_type":"json","schema_fields_count":"12"} diff --git a/resources_servers/structured_outputs/data/structured_outputs_260309_nano_v3_sdg_json_yaml_xml_example_metrics.json b/resources_servers/structured_outputs/data/structured_outputs_260309_nano_v3_sdg_json_yaml_xml_example_metrics.json new file mode 100644 index 000000000..18c0f1374 --- /dev/null +++ b/resources_servers/structured_outputs/data/structured_outputs_260309_nano_v3_sdg_json_yaml_xml_example_metrics.json @@ -0,0 +1,50 @@ +{ + "name": "example", + "type": "example", + "jsonl_fpath": "resources_servers/structured_outputs/data/structured_outputs_260309_nano_v3_sdg_json_yaml_xml_example.jsonl", + "num_repeats": 1, + "gitlab_identifier": null, + "huggingface_identifier": null, + "license": null, + "Number of examples": 5, + "Number of tools": { + "Total # non-null values": 0, + "Average": 0.0, + "Min": 0.0, + "Max": 0.0, + "Standard deviation": 0.0 + }, + "Json-dumped number of words (proxy for token count)": { + "Total # non-null values": 5, + "Average": 793.8, + "Min": 551.0, + "Max": 963.0, + "Standard deviation": 182.12 + }, + "Number of turns": { + "Total # non-null values": 5, + "Average": 1.4, + "Min": 1.0, + "Max": 2.0, + "Standard deviation": 0.548 + }, + "Temperature": { + "Total # non-null values": 0, + "Average": 0.0, + "Min": 0.0, + "Max": 0.0, + "Standard deviation": 0.0 + }, + "schema_str": { + "unique_count": 5, + "total_count": 5 + }, + "schema_type": { + "unique_count": 3, + "total_count": 5 + }, + "schema_fields_count": { + "unique_count": 4, + "total_count": 5 + } +} \ No newline at end of file diff --git a/resources_servers/structured_outputs/data/structured_outputs_260309_nano_v3_sdg_json_yaml_xml_train_metrics.json b/resources_servers/structured_outputs/data/structured_outputs_260309_nano_v3_sdg_json_yaml_xml_train_metrics.json new file mode 100644 index 000000000..fefbffc6f --- /dev/null +++ b/resources_servers/structured_outputs/data/structured_outputs_260309_nano_v3_sdg_json_yaml_xml_train_metrics.json @@ -0,0 +1,54 @@ +{ + "name": "train", + "type": "train", + "jsonl_fpath": "resources_servers/structured_outputs/data/structured_outputs_260309_nano_v3_sdg_json_yaml_xml_train.jsonl", + "num_repeats": 1, + "gitlab_identifier": { + "dataset_name": "structured_outputs_260309_nano_v3_sdg_json_yaml_xml", + "version": "0.0.1", + "artifact_fpath": "structured_outputs_260309_nano_v3_sdg_json_yaml_xml_train.jsonl" + }, + "huggingface_identifier": null, + "license": "Apache 2.0", + "Number of examples": 28145, + "Number of tools": { + "Total # non-null values": 0, + "Average": 0.0, + "Min": 0.0, + "Max": 0.0, + "Standard deviation": 0.0 + }, + "Json-dumped number of words (proxy for token count)": { + "Total # non-null values": 28145, + "Average": 694.45, + "Min": 101.0, + "Max": 2911.0, + "Standard deviation": 259.52 + }, + "Number of turns": { + "Total # non-null values": 28145, + "Average": 1.41, + "Min": 1.0, + "Max": 2.0, + "Standard deviation": 0.491 + }, + "Temperature": { + "Total # non-null values": 0, + "Average": 0.0, + "Min": 0.0, + "Max": 0.0, + "Standard deviation": 0.0 + }, + "schema_str": { + "unique_count": 18231, + "total_count": 28145 + }, + "schema_type": { + "unique_count": 3, + "total_count": 28145 + }, + "schema_fields_count": { + "unique_count": 10, + "total_count": 28145 + } +} \ No newline at end of file diff --git a/resources_servers/structured_outputs/data/structured_outputs_260309_nano_v3_sdg_json_yaml_xml_val_metrics.json b/resources_servers/structured_outputs/data/structured_outputs_260309_nano_v3_sdg_json_yaml_xml_val_metrics.json new file mode 100644 index 000000000..81d4fb2c1 --- /dev/null +++ b/resources_servers/structured_outputs/data/structured_outputs_260309_nano_v3_sdg_json_yaml_xml_val_metrics.json @@ -0,0 +1,54 @@ +{ + "name": "validation", + "type": "validation", + "jsonl_fpath": "resources_servers/structured_outputs/data/structured_outputs_260309_nano_v3_sdg_json_yaml_xml_val.jsonl", + "num_repeats": 1, + "gitlab_identifier": { + "dataset_name": "structured_outputs_260309_nano_v3_sdg_json_yaml_xml", + "version": "0.0.1", + "artifact_fpath": "structured_outputs_260309_nano_v3_sdg_json_yaml_xml_val.jsonl" + }, + "huggingface_identifier": null, + "license": "Apache 2.0", + "Number of examples": 357, + "Number of tools": { + "Total # non-null values": 0, + "Average": 0.0, + "Min": 0.0, + "Max": 0.0, + "Standard deviation": 0.0 + }, + "Json-dumped number of words (proxy for token count)": { + "Total # non-null values": 357, + "Average": 654.76, + "Min": 199.0, + "Max": 1522.0, + "Standard deviation": 230.62 + }, + "Number of turns": { + "Total # non-null values": 357, + "Average": 1.39, + "Min": 1.0, + "Max": 2.0, + "Standard deviation": 0.488 + }, + "Temperature": { + "Total # non-null values": 0, + "Average": 0.0, + "Min": 0.0, + "Max": 0.0, + "Standard deviation": 0.0 + }, + "schema_str": { + "unique_count": 231, + "total_count": 357 + }, + "schema_type": { + "unique_count": 3, + "total_count": 357 + }, + "schema_fields_count": { + "unique_count": 10, + "total_count": 357 + } +} \ No newline at end of file diff --git a/resources_servers/structured_outputs/requirements.txt b/resources_servers/structured_outputs/requirements.txt index 3f80e8452..0797678d6 100644 --- a/resources_servers/structured_outputs/requirements.txt +++ b/resources_servers/structured_outputs/requirements.txt @@ -1,2 +1,3 @@ -e nemo-gym[dev] @ ../../ -openapi-schema-validator==0.6.3 \ No newline at end of file +openapi-schema-validator==0.6.3 +xmltodict==1.0.2 \ No newline at end of file diff --git a/resources_servers/structured_outputs/tests/test_app.py b/resources_servers/structured_outputs/tests/test_app.py index 5c53de1c2..93a63e1c6 100644 --- a/resources_servers/structured_outputs/tests/test_app.py +++ b/resources_servers/structured_outputs/tests/test_app.py @@ -16,6 +16,8 @@ from typing import Any from unittest.mock import AsyncMock, MagicMock +import xmltodict +import yaml from pytest import fixture from nemo_gym.openai_utils import ( @@ -254,3 +256,334 @@ async def test_verify_json(self, config: StructuredOutputsResourcesServerConfig) nested_extra_field_verify_response = await resources_server.verify(nested_extra_field_request) assert nested_extra_field_verify_response.reward == 0.0 + + async def test_verify_yaml(self, config: StructuredOutputsResourcesServerConfig) -> None: + server_mock = MagicMock(spec=ServerClient) + resources_server = StructuredOutputsResourcesServer(config=config, server_client=server_mock) + response_mock = AsyncMock() + post_mock = MagicMock() + post_mock.json = response_mock + server_mock.post = AsyncMock(return_value=post_mock) + + test_schema = { + "type": "object", + "properties": { + "studentId": {"type": "string"}, + "examSubject": {"type": "string"}, + "plannedStudyHours": {"type": "integer"}, + "isFullTimeStudent": {"type": "boolean"}, + "studyMaterials": { + "type": "object", + "properties": { + "textbooks": {"type": "array", "items": {"type": "string"}}, + "onlineResources": {"type": "array", "items": {"type": "string"}}, + "practiceExams": { + "type": "object", + "properties": { + "completedCount": {"type": "integer"}, + "averageScore": {"type": "number"}, + "mostRecentDate": {"type": "string", "format": "date"}, + }, + "required": ["completedCount", "averageScore", "mostRecentDate"], + "additionalProperties": False, + }, + }, + "required": ["textbooks", "onlineResources", "practiceExams"], + "additionalProperties": False, + }, + "studySchedule": { + "type": "object", + "properties": { + "weeklyHours": {"type": "integer"}, + "sessionsPerWeek": {"type": "integer"}, + "preferredTimeOfDay": {"type": "string", "enum": ["morning", "afternoon", "evening"]}, + "studyDays": {"type": "array", "items": {"type": "string"}}, + "breakSchedule": { + "type": "object", + "properties": { + "shortBreakMinutes": {"type": "integer"}, + "longBreakMinutes": {"type": "integer"}, + "breakFrequencyMinutes": {"type": "integer"}, + }, + "required": ["shortBreakMinutes", "longBreakMinutes", "breakFrequencyMinutes"], + "additionalProperties": False, + }, + }, + "required": ["weeklyHours", "sessionsPerWeek", "preferredTimeOfDay", "studyDays", "breakSchedule"], + "additionalProperties": False, + }, + "preparationStatus": { + "type": "string", + "enum": ["not_started", "in_progress", "review_only", "ready"], + }, + }, + } + test_completion_obj = { + "studentId": "STU12345", + "examSubject": "Calculus II", + "plannedStudyHours": 120, + "isFullTimeStudent": True, + "studyMaterials": { + "textbooks": ["Calculus: Early Transcendentals", "Schaum\u2019s Outline of Calculus"], + "onlineResources": ["Khan Academy", "Coursera Calculus Course"], + "practiceExams": {"completedCount": 8, "averageScore": 87.5, "mostRecentDate": "2024-05-10"}, + }, + "studySchedule": { + "weeklyHours": 15, + "sessionsPerWeek": 5, + "preferredTimeOfDay": "evening", + "studyDays": ["Monday", "Wednesday", "Friday"], + "breakSchedule": {"shortBreakMinutes": 10, "longBreakMinutes": 25, "breakFrequencyMinutes": 50}, + }, + "preparationStatus": "in_progress", + } + test_completion_yaml = yaml.dump(test_completion_obj, default_flow_style=False) + + schema_str = json.dumps(test_schema) + dummy_create_params = NeMoGymResponseCreateParamsNonStreaming(input=[]) + + # --- Test 1: Valid YAML --- + valid_output_item = self._create_response_output_message(test_completion_yaml) + valid_response = NeMoGymResponse( + id="valid_yaml_response_id", + created_at=1234.5, + model="test_model", + object="response", + output=[valid_output_item], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + ) + + valid_request = StructuredOutputsVerifyRequest( + responses_create_params=dummy_create_params, + response=valid_response, + schema_str=schema_str, + schema_type=SchemaType.YAML, + ) + + valid_verify_response = await resources_server.verify(valid_request) + assert valid_verify_response.reward == 1.0 + assert valid_verify_response.response == valid_response + + # --- Test 2: Invalid YAML (Not parsable) --- + invalid_yaml_completion = "key: value\n bad_indent: oops\n notvalid" + invalid_yaml_output_item = self._create_response_output_message(invalid_yaml_completion) + invalid_yaml_response = valid_response.model_copy( + deep=True, update={"id": "invalid_yaml_id", "output": [invalid_yaml_output_item]} + ) + + invalid_yaml_request = StructuredOutputsVerifyRequest( + responses_create_params=dummy_create_params, + response=invalid_yaml_response, + schema_str=schema_str, + schema_type=SchemaType.YAML, + ) + + invalid_yaml_verify_response = await resources_server.verify(invalid_yaml_request) + assert invalid_yaml_verify_response.reward == 0.0 + + # --- Test 3: Schema Mismatch (Missing field) --- + missing_field_obj = {k: v for k, v in test_completion_obj.items() if k != "studentId"} + missing_field_completion = yaml.dump(missing_field_obj, default_flow_style=False) + + missing_field_output_item = self._create_response_output_message(missing_field_completion) + missing_field_response = valid_response.model_copy( + deep=True, update={"id": "missing_field_yaml_id", "output": [missing_field_output_item]} + ) + + missing_field_request = StructuredOutputsVerifyRequest( + responses_create_params=dummy_create_params, + response=missing_field_response, + schema_str=schema_str, + schema_type=SchemaType.YAML, + ) + + missing_field_verify_response = await resources_server.verify(missing_field_request) + assert missing_field_verify_response.reward == 0.0 + + # --- Test 4: Schema Mismatch (Extra field) --- + extra_field_obj = {**test_completion_obj, "extraField": "some value"} + extra_field_completion = yaml.dump(extra_field_obj, default_flow_style=False) + + extra_field_output_item = self._create_response_output_message(extra_field_completion) + extra_field_response = valid_response.model_copy( + deep=True, update={"id": "extra_field_yaml_id", "output": [extra_field_output_item]} + ) + + extra_field_request = StructuredOutputsVerifyRequest( + responses_create_params=dummy_create_params, + response=extra_field_response, + schema_str=schema_str, + schema_type=SchemaType.YAML, + ) + + extra_field_verify_response = await resources_server.verify(extra_field_request) + assert extra_field_verify_response.reward == 0.0 + + # --- Test 5: Schema Mismatch (Wrong type) --- + wrong_type_obj = {**test_completion_obj, "plannedStudyHours": "one hundred"} + wrong_type_completion = yaml.dump(wrong_type_obj, default_flow_style=False) + + wrong_type_output_item = self._create_response_output_message(wrong_type_completion) + wrong_type_response = valid_response.model_copy( + deep=True, update={"id": "wrong_type_yaml_id", "output": [wrong_type_output_item]} + ) + + wrong_type_request = StructuredOutputsVerifyRequest( + responses_create_params=dummy_create_params, + response=wrong_type_response, + schema_str=schema_str, + schema_type=SchemaType.YAML, + ) + + wrong_type_verify_response = await resources_server.verify(wrong_type_request) + assert wrong_type_verify_response.reward == 0.0 + + # --- Test 6: Schema Mismatch (Nested extra field) --- + nested_extra_obj = json.loads(json.dumps(test_completion_obj)) + nested_extra_obj["studyMaterials"]["practiceExams"]["extraNestedField"] = "bad value" + nested_extra_field_completion = yaml.dump(nested_extra_obj, default_flow_style=False) + + nested_extra_field_output_item = self._create_response_output_message(nested_extra_field_completion) + nested_extra_field_response = valid_response.model_copy( + deep=True, update={"id": "nested_extra_yaml_id", "output": [nested_extra_field_output_item]} + ) + + nested_extra_field_request = StructuredOutputsVerifyRequest( + responses_create_params=dummy_create_params, + response=nested_extra_field_response, + schema_str=schema_str, + schema_type=SchemaType.YAML, + ) + + nested_extra_field_verify_response = await resources_server.verify(nested_extra_field_request) + assert nested_extra_field_verify_response.reward == 0.0 + + async def test_verify_xml(self, config: StructuredOutputsResourcesServerConfig) -> None: + server_mock = MagicMock(spec=ServerClient) + resources_server = StructuredOutputsResourcesServer(config=config, server_client=server_mock) + response_mock = AsyncMock() + post_mock = MagicMock() + post_mock.json = response_mock + server_mock.post = AsyncMock(return_value=post_mock) + + test_schema = { + "type": "object", + "properties": { + "root": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "score": {"type": "number"}, + "active": {"type": "boolean"}, + "tag": {"type": "array", "items": {"type": "string"}}, + }, + }, + }, + } + valid_obj = {"root": {"name": "Alice", "age": 25, "score": 95.5, "active": True, "tag": ["python", "ml"]}} + valid_xml = xmltodict.unparse(valid_obj) + + schema_str = json.dumps(test_schema) + dummy_create_params = NeMoGymResponseCreateParamsNonStreaming(input=[]) + + # --- Test 1: Valid XML (with coercion enabled by default) --- + valid_output_item = self._create_response_output_message(valid_xml) + valid_response = NeMoGymResponse( + id="valid_xml_response_id", + created_at=1234.5, + model="test_model", + object="response", + output=[valid_output_item], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + ) + + valid_request = StructuredOutputsVerifyRequest( + responses_create_params=dummy_create_params, + response=valid_response, + schema_str=schema_str, + schema_type=SchemaType.XML, + ) + + valid_verify_response = await resources_server.verify(valid_request) + assert valid_verify_response.reward == 1.0 + assert valid_verify_response.response == valid_response + + # --- Test 2: Malformed XML --- + malformed_xml = "Alice25" + malformed_output_item = self._create_response_output_message(malformed_xml) + malformed_response = valid_response.model_copy( + deep=True, update={"id": "malformed_xml_id", "output": [malformed_output_item]} + ) + + malformed_request = StructuredOutputsVerifyRequest( + responses_create_params=dummy_create_params, + response=malformed_response, + schema_str=schema_str, + schema_type=SchemaType.XML, + ) + + malformed_verify_response = await resources_server.verify(malformed_request) + assert malformed_verify_response.reward == 0.0 + + # --- Test 3: Schema Mismatch (Missing field) --- + missing_obj = {"root": {"name": "Alice", "score": 95.5, "active": True, "tag": ["python", "ml"]}} + missing_xml = xmltodict.unparse(missing_obj) + + missing_output_item = self._create_response_output_message(missing_xml) + missing_response = valid_response.model_copy( + deep=True, update={"id": "missing_field_xml_id", "output": [missing_output_item]} + ) + + missing_request = StructuredOutputsVerifyRequest( + responses_create_params=dummy_create_params, + response=missing_response, + schema_str=schema_str, + schema_type=SchemaType.XML, + ) + + missing_verify_response = await resources_server.verify(missing_request) + assert missing_verify_response.reward == 0.0 + + # --- Test 4: Schema Mismatch (Extra field) --- + extra_obj = {**valid_obj["root"], "extraField": "bad"} + extra_xml = xmltodict.unparse({"root": extra_obj}) + + extra_output_item = self._create_response_output_message(extra_xml) + extra_response = valid_response.model_copy( + deep=True, update={"id": "extra_field_xml_id", "output": [extra_output_item]} + ) + + extra_request = StructuredOutputsVerifyRequest( + responses_create_params=dummy_create_params, + response=extra_response, + schema_str=schema_str, + schema_type=SchemaType.XML, + ) + + extra_verify_response = await resources_server.verify(extra_request) + assert extra_verify_response.reward == 0.0 + + # --- Test 5: Coercion disabled -- non-string types fail validation --- + no_coerce_config = StructuredOutputsResourcesServerConfig( + host="0.0.0.0", + port=8080, + entrypoint="", + name="", + xml_coerce_types=False, + ) + no_coerce_server = StructuredOutputsResourcesServer(config=no_coerce_config, server_client=server_mock) + + no_coerce_request = StructuredOutputsVerifyRequest( + responses_create_params=dummy_create_params, + response=valid_response, + schema_str=schema_str, + schema_type=SchemaType.XML, + ) + + no_coerce_verify_response = await no_coerce_server.verify(no_coerce_request) + assert no_coerce_verify_response.reward == 0.0