Speech Tiles Developer Area, STVoiceFlow Developer Guide

{
    "$schema": "https://json-schema.org/draft/2020-12/schema",
    "$id": "https://speechtiles.com/draft/schemas/AudioPromptModuleList.schema.json",
    "copyrightNotice": "© Speech Tiles LLC. 2025, All rights reserved.",
    "title": "Audio Prompt Modules",
    "description": "Version 2.0.0; last updated 10/01/2025. This document contains schema for Audio Prompt Modules used for audio playback of multiple Audio Segments or for audio playback of other referenced multiple Audio Prompt Modules. Unless documented otherwise, value of properties that are of type \"string\" can be updated at runtime by an application if their value is entered as a field name wrapped by \"$[\" and \"]\". If a property is of a type other than \"string\", then an additional property of type \"string\" with the same property name appended with the string \"Runtime\" is added to have its value entered as a field name wrapped by \"$[\" and \"]\". This allows the application to set the value of the field name at runtime to be the value of the original non-runtime property. Valid values of runtime fields override the values of non-runtime properties.",
    
    "type": "array",
    "items": {
        "description": "Array of Audio Prompt Modules",
        "type": "object",
        "properties": {
            "_COMMENT_": {
                "description": "Placeholder for entering a comment. Voice Flow processing ignores this property.",
                "type": "string"
            },
            "id": {
                "description": "Unique ID of an Audio Prompt Module. The value cannot be set at runtime.",
                "type": "string"
            },
            "style": {
                "description": "Style of selection of audio segments or Audio Prompt Modules. Values: \"single\", \"select\", \"combo\", or \"serial\". If Audio Prompt Module references a collection of other Audio Prompt Module IDs then Default: \"combo\". The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                "type": "string"
            },
            "_audioFile": {
                "description": "Placeholder for documenting or noting other audioFile options. Voice Flow processing ignores this property.",
                "type": "string"
            },
            "audioFile": {
                "description": "The name of the audio file containing the recorded Audio Segment for audio playback. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                "type": "string"
            },
            "_textString": {
                "description": "Placeholder for documenting or noting other textString options. Voice Flow processing ignores this property.",
                "type": "string"
            },
            "textString": {
                "description": "The text string to be synthesized for audio playback. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                "type": "string"
            },
            "_textFile": {
                "description": "Placeholder for documenting or noting other textFile options. Voice Flow processing ignores this property.",
                "type": "string"
            },
            "textFile": {
                "description": "The name of the text file containing the text to be synthesized for audio playback. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                "type": "string"
            },
            "textIsSSML": {
                "description": "Specifies whether the text to be synthesized contains Speech Synthesis Markup Language \"SSML\". Values: \"true\" or \"false\". Default: \"false\". The value can be set at runtime using the \"textIsSSMLRuntime\" property.",
                "type": "boolean"
            },
            "textIsSSMLRuntime": {
                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose boolean value can be set at runtime by an application. If the value of the field is a valid boolean, then it will replace the value entered in the \"textIsSSML\" property. Example: \"$[TextSSML]\".",
                "type": "string"
            },
            "autoDetectSSML": {
                "description": "Specifies whether the the speech synthesis should attempt to auto detect if the text contains SSML markup and to process accordingly.",
                "type": "boolean"
            },
            "autoDetectSSMLRuntime": {
                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose boolean value can be set at runtime by an application. If the value of the field is a valid boolean, then it will replace the value entered in the \"autoDetectSSML\" property. Example: \"autoDetectSSMLRuntime\": \"$[AutoDetectSSML]\".",
                "type": "string"
            },
            "autoDetectLanguage": {
                "description": "Specifies whether the the speech synthesis should attempt to auto detect the language of the text and to select the proper voice for speech synthesis.",
                "type": "boolean"
            },
            "autoDetectLanguageRuntime": {
                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose boolean value can be set at runtime by an application. If the value of the field is a valid boolean, then it will replace the value entered in the \"autoDetectLanguage\" property. Example: \"autoDetectLanguageRuntime\": \"$[AutoDetectLanguage]\".",
                "type": "string"
            },
            "ssEngine": {
                "description": "The Text To Speech or Speech Synthesizer engine to be used for Speech Synthesis. Currently, only value of \"Apple\" is supported which is also the default if this parameter is not specified. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                "type": "string"
            },
            "languageFolder": {
                "description": "The name of the language folder name set up to contain the recorded audio files. Default: \"en-US\". The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                "type": "string"
            },
            "promptCollection": {
                "description": "Specifies a reference to an array of other Audio Prompt Module IDs referenced from this Audio Prompt Module.",
                "type": "array",
                "items": {
                    "description": "Array of Audio Prompt Module IDs.",
                    "type": "object",
                    "properties": {
                        "_COMMENT_": {
                            "description": "Placeholder for entering a comment. Voice Flow processing ignores this property.",
                            "type": "string"
                        },
                        "_promptID": {
                            "description": "Placeholder for documenting or noting other promptID options. Voice Flow processing ignores this property.",
                            "type": "string"
                        },
                        "promptID": {
                            "description": "The ID of an Audio Prompt Module to process for audio playback. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                            "type": "string"
                        },
                    },
                    "required": ["promptID"],
                    "additionalProperties": false
                }
            },
            "ssVoiceParams": {
                "description": "Specifies the parameters of the voice used for Speech Synthesis.",
                "type": "object",
                "properties": {
                    "_COMMENT_": {
                        "description": "Placeholder for entering a comment. Voice Flow processing ignores this property.",
                        "type": "string"
                    },
                    "identifier": {
                        "description": "The voice identifier. All supported speech synthesis voices with their parameters can be extracted from STVoiceFlow framewok via STVoiceFlow and STMedia SDKs. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                        "type": "string"
                    },
                    "name": {
                        "description": "The voice name. All supported speech synthesis voices with their parameters can be extracted from STVoiceFlow framewok via STVoiceFlow and STMedia SDKs. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                        "type": "string"
                    },
                    "gender": {
                        "description": "The voice gender. Options are: \"male\", \"female\" and \"unknown\". Default value is \"unknown\". All supported speech synthesis voices with their parameters can be extracted from STVoiceFlow framewok via STVoiceFlow and STMedia SDKs. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                        "type": "string"
                    },
                    "quality": {
                        "description": "The voice quality. Options are: \"default\", \"enhanced\", \"premium\", \"unspecified\", and \"unknown\". Default value is \"unknown\". All supported speech synthesis voices with their parameters can be extracted from STVoiceFlow framewok via STVoiceFlow and STMedia SDKs. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                        "type": "string"
                    },
                    "language": {
                        "description": "The voice language. Example: \"en-US\" for US english. All supported speech synthesis voices with their parameters can be extracted from STVoiceFlow framewok via STVoiceFlow and STMedia SDKs. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                        "type": "string"
                    },
                },
                "required": [],
                "additionalProperties": false
            },
            "audioPlaybackParams": {
                "description": "Specifies audio playback parameters of the Audio Segment referenced in an Audio Prompt Module.",
                "type": "object",
                "properties": {
                    "_COMMENT_": {
                        "description": "Placeholder for entering a comment. Voice Flow processing ignores this property.",
                        "type": "string"
                    },
                    "loopPlay": {
                        "description": "Specifies whether to repeat audio playback of Audio Segment after audio playback ends.  The value can be set at runtime using the \"loopPlayRuntime\" property. Default: \"false\".",
                        "type": "boolean"
                    },
                    "loopPlayRuntime": {
                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose boolean value can be set at runtime by an application. If the value of the field is a valid boolean, then it will replace the value entered in the \"loopPlay\" property. Example: \"$[LoopPlay]\".",
                        "type": "string"
                    },
                    "loopCount": {
                        "description": "Automatic maximum count of audio playbacks of Audio Segment if \"loopPlay\" property is \"true\". The value can be set at runtime using the \"loopCountRuntime\" property. Default: 1.",
                        "type": "integer"
                    },
                    "loopCountRuntime": {
                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"loopCount\" property. Example: \"$[LoopCount]\".",
                        "type": "string"
                    },
                    "volume": {
                        "description": "Specifies a volume to apply to audio playback. The value can be set at runtime using the \"volumeRuntime\" property. Values: minimum value > 0.0. Max value is 1.0. Default: 1.0.",
                        "type": "number"
                    },
                    "volumeRuntime": {
                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose float value can be set at runtime by an application. If the value of the field is a valid float, then it will replace the value entered in the \"volume\" property. Example: \"$[Volume]\".",
                        "type": "string"
                    },
                    "rate": {
                        "description": "Specifies a rate to apply to audio playback. The value can be set at runtime using the \"rateRuntime\" property. Values: beetween 0.5 and 2.0. Default: 1.0.",
                        "type": "number"
                    },
                    "rateRuntime": {
                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose float value can be set at runtime by an application. If the value of the field is a valid float, then it will replace the value entered in the \"rate\" property. Example: \"$[Rate]\".",
                        "type": "string"
                    },
                    "pitch": {
                        "description": "Specifies a pitch to apply to audio playback. The value can be set at runtime using the \"pitchRuntime\" property. Values between -5.0 and 5.0. Default: 0.0.",
                        "type": "number"
                    },
                    "pitchRuntime": {
                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose float value can be set at runtime by an application. If the value of the field is a valid float, then it will replace the value entered in the \"pitch\" property. Example: \"$[Pitch]\".",
                        "type": "string"
                    },
                    "fadeOutMs": {
                        "description": "The duration in milliseconds for audio playback of the Audio Segment to fade out before completely stopping. The value can be set at runtime using the \"fadeOutMsRuntime\" property. Default: 0.",
                        "type": "integer"
                    },
                    "fadeOutMsRuntime": {
                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"fadeOutMs\" property. Example: \"$[FadeOutMs]\".",
                        "type": "string"
                    },
                    "startPosMs": {
                        "description": "Time position in milliseconds for when audio playback to start for the Audio Segment. The value can be set at runtime using the \"startPosMsRuntime\" property. Default: 0.",
                        "type": "integer"
                    },
                    "startPosMsRuntime": {
                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"startPosMs\" property. Example: \"$[StartPosMs]\".",
                        "type": "string"
                    },
                    "stopPosMs": {
                        "description": "Time position in milliseconds for when audio playback to stop for the Audio Segment. The value can be set at runtime using the \"stopPosMsRuntime\" property. Default: 0 for audio playback to continue to the end of the audio.",
                        "type": "integer"
                    },
                    "stopPosMsRuntime": {
                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"stopPosMs\" property. Example: \"$[StopPosMs]\".",
                        "type": "string"
                    },
                    "notifyPosMs": {
                        "description": "Time position in milliseconds for when audio playback is reached, a notification is sent to Client. The value can be set at runtime using the \"notifyPosMsRuntime\" property.",
                        "type": "integer"
                    },
                    "notifyPosMsRuntime": {
                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"notifyPosMs\" property. Example: \"$[NotifyPosMs]\".",
                        "type": "string"
                    },
                    "playSSSilenceParams": {
                        "description": "Specifies the Speech Synthesis parameters for audio play of synthesized text.",
                        "type": "object",
                        "properties": {
                            "_COMMENT_": {
                                "description": "Placeholder for entering a comment. Voice Flow processing ignores this property.",
                                "type": "string"
                            },
                            "preStartSilenceDurationMs": {
                                "description": "Durarion of silence to generate (for audio playback or to record) before the start of speech synthesis of text. Default value: 0. The value can be set at runtime using the \"preStartSilenceDurationMsRuntime\" property.",
                                "type": "integer"
                            },
                            "preStartSilenceDurationMsRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"preStartSilenceDurationMs\" property. Example: \"$[PreStartSilenceDurationMs]\".",
                                "type": "string"
                            },
                            "postFinishSilenceDurationMs": {
                                "description": "Durarion of silence to generate (for audio playback or to record) after the end of speech synthesis of text. Default value: 0. The value can be set at runtime using the \"postFinishSilenceDurationRuntime\" property.",
                                "type": "integer"
                            },
                            "postFinishSilenceDurationRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"postFinishSilenceDurationMs\" property. Example: \"$[PostFinishSilenceDurationMs]\".",
                                "type": "string"
                            },
                            "interPhraseSilenceDurationMs": {
                                "description": "Durarion of silence to generate (for audio playback or to record) between speech synthesis of phrases from a text. Default value: 0. The value can be set at runtime using the \"interPhraseSilenceDurationMsRuntime\" property.",
                                "type": "integer"
                            },
                            "interPhraseSilenceDurationMsRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"interPhraseSilenceDurationMs\" property. Example: \"$[InterPhraseSilenceDurationMs]\".",
                                "type": "string"
                            },
                            "interParagraphSilenceDurationMs": {
                                "description": "Durarion of silence to generate (for audio playback or to record) between speech synthesis of paragraphs from a text. Default value: 0. The value can be set at runtime using the \"interParagraphSilenceDurationMsRuntime\" property.",
                                "type": "integer"
                            },
                            "interParagraphSilenceDurationMsRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"interParagraphSilenceDurationMs\" property. Example: \"$[InterParagraphSilenceDurationMs]\".",
                                "type": "string"
                            },
                            "skipSilenceDurationMs": {
                                "description": "Durarion of silence to generate for audio playback before the continuation of speech synthesis audio playback after the audio playback of the previous speech synthesis is skipped . Default value: 0. The value can be set at runtime using the \"skipSilenceDurationMsRuntime\" property.",
                                "type": "integer"
                            },
                            "skipSilenceDurationMsRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"skipSilenceDurationMsRuntime\" property. Example: \"$[SkipSilenceDurationMsRuntime]\".",
                                "type": "string"
                            },
                            
                        },
                        "required": [],
                        "additionalProperties": false
                    },
                    "recordSSParams": {
                        "description": "The parameters to control saving speech synthesis of an audio segment into a file",
                        "type": "object",
                        "properties": {
                            "_COMMENT_": {
                                "description": "Placeholder for entering a comment. Voice Flow processing ignores this property.",
                                "type": "string"
                            },
                            "enable": {
                                "description": "Enable or disable saving speech synthesis into a file. The value can be set at runtime using the \"enableRuntime\" property.",
                                "type": "boolean"
                            },
                            "enableRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose boolean value can be set at runtime by an application. If the value of the field is a valid boolean, then it will replace the value entered in the \"enable\" property. Example: \"$[Enable]\".",
                                "type": "string"
                            },
                            "offlineMode": {
                                "description": "If true, then audio playback will not occur on a device. Only speech synthesis is saved into a file. The value can be set at runtime using the \"offlineModeRuntime\" property.",
                                "type": "boolean"
                            },
                            "offlineModeRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose boolean value can be set at runtime by an application. If the value of the field is a valid boolean, then it will replace the value entered in the \"offlineMode\" property. Example: \"$[OfflineMode]\".",
                                "type": "string"
                            },
                            "recordInsertedSilence": {
                                "description": "If true, then inserted silence in synthesized speech between phrases and paragraphs gets also recorded. The value can be set at runtime using the \"recordInsertedSilenceRuntime\" property.",
                                "type": "boolean"
                            },
                            "recordInsertedSilenceRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose boolean value can be set at runtime by an application. If the value of the field is a valid boolean, then it will replace the value entered in the \"recordInsertedSilence\" property. Example: \"$[RecordInsertedSilence]\".",
                                "type": "string"
                            },
                            "filetype": {
                                "description": "Type of audio file to contain the synthesized audip. options are \"wav\", \"m4a\" and \"caf\".  The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                "type": "string"
                            },
                            "fileBaseName": {
                                "description": "File base name. \".wav\", \".m4a\" or \".caf\" is appended. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                "type": "string"
                            },
                            "fileURLPath": {
                                "description": "Folder path where the synthesized audio file is saved. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                "type": "string"
                            },
                            "languageFolder": {
                                "description": "If specified that \"languageFolder\" name. this will be appended ot the \"fileURLPath\". The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                "type": "string"
                            },
                            "audioFormat": {
                                "description": "The parameters to control the audio format of the recorded synthesized audio to a file.",
                                "type": "object",
                                "properties": {
                                    "_COMMENT_": {
                                        "description": "Placeholder for entering a comment. Voice Flow processing ignores this property.",
                                        "type": "string"
                                    },
                                    "formatID": {
                                        "description": "The audio format of the recorded audio to a file. Options are: \"linearpcm\" for linear PCM, \"aac\" for Apple MPEG4 AAC, and \"alac\" for Apple Lossless. Default value is \"linearpcm\".The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                        "type": "string"
                                    },
                                    "commonFormat": {
                                        "description": "The common format of the recorded audio to a file. Options are: \"int16\", \"int32\", \"float32\" and \"float64\". Default value is \"float32\".The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                        "type": "string"
                                    },
                                    "sampleRate": {
                                        "description": "The sampling rate of the recorded audio to a file. Options are: \"16000\", \"22050\", \"32000\", \"44100\" and \"48000\". Default value is determined automatically by the media engine. The value can be set at runtime using the \"sampleRateRuntime\" property.",
                                        "type": "integer"
                                    },
                                    "sampleRateRuntime": {
                                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"sampleRate\" property. Example: \"$[SampleRate]\".",
                                        "type": "string"
                                    },
                                    "channelCount": {
                                        "description": "The number of channerls of the recorded audio to a file. Options are: \"1\" and \"2\". Default value is determined automatically by the media engine. The value can be set at runtime using the \"channelCountRuntime\" property.",
                                        "type": "integer"
                                    },
                                    "channelCountRuntime": {
                                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"channelCount\" property. Example: \"$[ChannelCount]\".",
                                        "type": "string"
                                    },
                                    "encoderBitsPerSample": {
                                        "description": "The number of encoder bits per sample of the recorded audio to a file. This only applies to format ID \"alac\". Options are: \"16\", \"20\", \"24\" and \"32\". Default value is \"32\". The value can be set at runtime using the \"encoderBitsPerSampleRuntime\" property.",
                                        "type": "integer"
                                    },
                                    "encoderBitsPerSampleRuntime": {
                                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"encoderBitsPerSample\" property. Example: \"$[EncoderBitsPerSample]\".",
                                        "type": "string"
                                    },
                                    "encoderBitRate": {
                                        "description": "The encoder bit rate of the recorded audio to a file. This only applies to format ID \"aac\". Examples of values: \"32000\", \"64000\", \"96000\" and \"128000\". Default value is determined automatically by the media engine. Note: Setting the bit rate to higher may result in failure in recording audio to a file. The value can be set at runtime using the \"encoderBitRateRuntime\" property.",
                                        "type": "integer"
                                    },
                                    "encoderBitRateRuntime": {
                                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"encoderBitRate\" property. Example: \"$[EncoderBitRate]\".",
                                        "type": "string"
                                    },
                                },
                                "required": [],
                                "additionalProperties": false
                            },
                        },
                        "required": [],
                        "additionalProperties": false
                    },
                },
                "required": [],
                "additionalProperties": false
            },
        },
        "required": ["id", "style"],
        "additionalProperties": false
    }
}

Speech Tiles Developers

Conversational Speech Frameworks

Speech Tiles
Developers

Conversational Speech Frameworks

Conversational Speech Frameworks

STVoiceFlow Framework Reference
2.0.0
Audio Prompt Module List JSON Schema

STVoiceFlow Framework Reference 2.0.0 Audio Prompt Module List JSON Schema

STVoiceFlow Framework Reference
2.0.0
Audio Prompt Module List JSON Schema