JSON
Results will always be in the format :
{
"prediction": <awaited result>,
"prediction_raw": <raw result>
}
The awaited result is what we want when we use an endpoint. This is the data you will need in 99% of cases.
The raw result contains all the data relating to the transcription process. It can contain metadata, extra data, and/or the direct output from the model without any change on our side.
For example, the output of audio transcription using the example file is:
{
"prediction": [
{
"words": [
{
"word": "Split",
"time_begin": 0.17807,
"time_end": 0.65848,
"confidence": 1
},
{
"word": " infinity,",
"time_begin": 0.87867,
"time_end": 1.53923,
"confidence": 1
}
],
"transcription": "Split infinity,",
"language": "en",
"time_begin": 0.17807,
"time_end": 1.53923,
"speaker": "speaker_not_activated",
"channel": "channel_0"
},
{
"words": [
{
"word": "in",
"time_begin": 2.48004,
"time_end": 2.6001399999999997,
"confidence": 0.54
},
{
"word": " a",
"time_begin": 2.6001399999999997,
"time_end": 2.72024,
"confidence": 0.38
},
{
"word": " time",
"time_begin": 2.78029,
"time_end": 3.30074,
"confidence": 0.97
},
{
"word": " when",
"time_begin": 3.5009099999999997,
"time_end": 3.6410299999999998,
"confidence": 0.9
},
{
"word": " less",
"time_begin": 3.76113,
"time_end": 4.0213600000000005,
"confidence": 0.97
},
{
"word": " is",
"time_begin": 4.10142,
"time_end": 4.22153,
"confidence": 0.91
},
{
"word": " more,",
"time_begin": 4.32161,
"time_end": 4.68192,
"confidence": 0.88
}
],
"transcription": "in a time when less is more,",
"language": "en",
"time_begin": 2.48004,
"time_end": 4.68192,
"speaker": "speaker_not_activated",
"channel": "channel_0"
},
{
"words": [
{
"word": "where",
"time_begin": 5.52264,
"time_end": 5.70279,
"confidence": 0.98
},
{
"word": " too",
"time_begin": 5.8229,
"time_end": 6.04309,
"confidence": 0.93
},
{
"word": " much",
"time_begin": 6.08312,
"time_end": 6.44343,
"confidence": 1
},
{
"word": " is",
"time_begin": 6.6035699999999995,
"time_end": 6.7436799999999995,
"confidence": 1
},
{
"word": " never",
"time_begin": 6.90382,
"time_end": 7.2241,
"confidence": 1
},
{
"word": " enough,",
"time_begin": 7.32418,
"time_end": 7.7645599999999995,
"confidence": 0.85
}
],
"transcription": "where too much is never enough,",
"language": "en",
"time_begin": 5.52264,
"time_end": 7.7645599999999995,
"speaker": "speaker_not_activated",
"channel": "channel_0"
},
{
"words": [
{
"word": "there",
"time_begin": 8.50519,
"time_end": 8.685350000000001,
"confidence": 0.98
},
{
"word": " is",
"time_begin": 8.725380000000001,
"time_end": 8.84548,
"confidence": 1
},
{
"word": " always",
"time_begin": 9.00562,
"time_end": 9.385950000000001,
"confidence": 0.96
},
{
"word": " hope",
"time_begin": 9.486030000000001,
"time_end": 9.8063,
"confidence": 0.96
},
{
"word": " for",
"time_begin": 9.946420000000002,
"time_end": 10.06653,
"confidence": 1
},
{
"word": " the",
"time_begin": 10.086540000000001,
"time_end": 10.206650000000002,
"confidence": 0.96
},
{
"word": " future.",
"time_begin": 10.2667,
"time_end": 10.78714,
"confidence": 0.99
}
],
"transcription": "there is always hope for the future.",
"language": "en",
"time_begin": 8.50519,
"time_end": 10.78714,
"speaker": "speaker_not_activated",
"channel": "channel_0"
},
{
"words": [
{
"word": "The",
"time_begin": 11.68791,
"time_end": 11.80802,
"confidence": 1
},
{
"word": " future",
"time_begin": 11.868070000000001,
"time_end": 12.30845,
"confidence": 1
},
{
"word": " can",
"time_begin": 12.368500000000001,
"time_end": 12.54865,
"confidence": 1
},
{
"word": " be",
"time_begin": 12.568670000000001,
"time_end": 12.68877,
"confidence": 1
},
{
"word": " read",
"time_begin": 12.788860000000001,
"time_end": 13.029060000000001,
"confidence": 0.88
},
{
"word": " from",
"time_begin": 13.14916,
"time_end": 13.3093,
"confidence": 1
},
{
"word": " the",
"time_begin": 13.349340000000002,
"time_end": 13.46944,
"confidence": 0.88
},
{
"word": " past,",
"time_begin": 13.50947,
"time_end": 14.109990000000002,
"confidence": 1
}
],
"transcription": "The future can be read from the past,",
"language": "en",
"time_begin": 11.68791,
"time_end": 14.109990000000002,
"speaker": "speaker_not_activated",
"channel": "channel_0"
},
{
"words": [
{
"word": "the",
"time_begin": 14.550360000000001,
"time_end": 14.67047,
"confidence": 0.94
},
{
"word": " past",
"time_begin": 14.750530000000001,
"time_end": 15.311010000000001,
"confidence": 0.92
},
{
"word": " foreshadows",
"time_begin": 15.49117,
"time_end": 16.2318,
"confidence": 0.91
},
{
"word": " the",
"time_begin": 16.271839999999997,
"time_end": 16.391939999999998,
"confidence": 0.99
},
{
"word": " present,",
"time_begin": 16.43197,
"time_end": 16.992449999999998,
"confidence": 0.95
}
],
"transcription": "the past foreshadows the present,",
"language": "en",
"time_begin": 14.550360000000001,
"time_end": 16.992449999999998,
"speaker": "speaker_not_activated",
"channel": "channel_0"
},
{
"words": [
{
"word": "and",
"time_begin": 17.57295,
"time_end": 17.69305,
"confidence": 1
},
{
"word": " the",
"time_begin": 17.69305,
"time_end": 17.81315,
"confidence": 1
},
{
"word": " present",
"time_begin": 17.853189999999998,
"time_end": 18.35362,
"confidence": 0.97
},
{
"word": " hasn't",
"time_begin": 18.513749999999998,
"time_end": 18.93411,
"confidence": 0.83
},
{
"word": " been",
"time_begin": 19.01418,
"time_end": 19.1543,
"confidence": 0.97
},
{
"word": " written",
"time_begin": 19.19434,
"time_end": 19.514609999999998,
"confidence": 0.95
},
{
"word": " yet.",
"time_begin": 19.63471,
"time_end": 19.914949999999997,
"confidence": 1
}
],
"transcription": "and the present hasn't been written yet.",
"language": "en",
"time_begin": 17.57295,
"time_end": 19.914949999999997,
"speaker": "speaker_not_activated",
"channel": "channel_0"
}
],
"prediction_raw": {
"transcription": [
{
"words": [
{
"word": "Split",
"time_begin": 0.17807,
"time_end": 0.65848,
"confidence": 1
},
{
"word": " infinity,",
"time_begin": 0.87867,
"time_end": 1.53923,
"confidence": 1
}
],
"transcription": "Split infinity,",
"language": "en",
"time_begin": 0.17807,
"time_end": 1.53923,
"speaker": "speaker_not_activated",
"channel": "channel_0"
},
{
"words": [
{
"word": "in",
"time_begin": 2.48004,
"time_end": 2.6001399999999997,
"confidence": 0.54
},
{
"word": " a",
"time_begin": 2.6001399999999997,
"time_end": 2.72024,
"confidence": 0.38
},
{
"word": " time",
"time_begin": 2.78029,
"time_end": 3.30074,
"confidence": 0.97
},
{
"word": " when",
"time_begin": 3.5009099999999997,
"time_end": 3.6410299999999998,
"confidence": 0.9
},
{
"word": " less",
"time_begin": 3.76113,
"time_end": 4.0213600000000005,
"confidence": 0.97
},
{
"word": " is",
"time_begin": 4.10142,
"time_end": 4.22153,
"confidence": 0.91
},
{
"word": " more,",
"time_begin": 4.32161,
"time_end": 4.68192,
"confidence": 0.88
}
],
"transcription": "in a time when less is more,",
"language": "en",
"time_begin": 2.48004,
"time_end": 4.68192,
"speaker": "speaker_not_activated",
"channel": "channel_0"
},
{
"words": [
{
"word": "where",
"time_begin": 5.52264,
"time_end": 5.70279,
"confidence": 0.98
},
{
"word": " too",
"time_begin": 5.8229,
"time_end": 6.04309,
"confidence": 0.93
},
{
"word": " much",
"time_begin": 6.08312,
"time_end": 6.44343,
"confidence": 1
},
{
"word": " is",
"time_begin": 6.6035699999999995,
"time_end": 6.7436799999999995,
"confidence": 1
},
{
"word": " never",
"time_begin": 6.90382,
"time_end": 7.2241,
"confidence": 1
},
{
"word": " enough,",
"time_begin": 7.32418,
"time_end": 7.7645599999999995,
"confidence": 0.85
}
],
"transcription": "where too much is never enough,",
"language": "en",
"time_begin": 5.52264,
"time_end": 7.7645599999999995,
"speaker": "speaker_not_activated",
"channel": "channel_0"
},
{
"words": [
{
"word": "there",
"time_begin": 8.50519,
"time_end": 8.685350000000001,
"confidence": 0.98
},
{
"word": " is",
"time_begin": 8.725380000000001,
"time_end": 8.84548,
"confidence": 1
},
{
"word": " always",
"time_begin": 9.00562,
"time_end": 9.385950000000001,
"confidence": 0.96
},
{
"word": " hope",
"time_begin": 9.486030000000001,
"time_end": 9.8063,
"confidence": 0.96
},
{
"word": " for",
"time_begin": 9.946420000000002,
"time_end": 10.06653,
"confidence": 1
},
{
"word": " the",
"time_begin": 10.086540000000001,
"time_end": 10.206650000000002,
"confidence": 0.96
},
{
"word": " future.",
"time_begin": 10.2667,
"time_end": 10.78714,
"confidence": 0.99
}
],
"transcription": "there is always hope for the future.",
"language": "en",
"time_begin": 8.50519,
"time_end": 10.78714,
"speaker": "speaker_not_activated",
"channel": "channel_0"
},
{
"words": [
{
"word": "The",
"time_begin": 11.68791,
"time_end": 11.80802,
"confidence": 1
},
{
"word": " future",
"time_begin": 11.868070000000001,
"time_end": 12.30845,
"confidence": 1
},
{
"word": " can",
"time_begin": 12.368500000000001,
"time_end": 12.54865,
"confidence": 1
},
{
"word": " be",
"time_begin": 12.568670000000001,
"time_end": 12.68877,
"confidence": 1
},
{
"word": " read",
"time_begin": 12.788860000000001,
"time_end": 13.029060000000001,
"confidence": 0.88
},
{
"word": " from",
"time_begin": 13.14916,
"time_end": 13.3093,
"confidence": 1
},
{
"word": " the",
"time_begin": 13.349340000000002,
"time_end": 13.46944,
"confidence": 0.88
},
{
"word": " past,",
"time_begin": 13.50947,
"time_end": 14.109990000000002,
"confidence": 1
}
],
"transcription": "The future can be read from the past,",
"language": "en",
"time_begin": 11.68791,
"time_end": 14.109990000000002,
"speaker": "speaker_not_activated",
"channel": "channel_0"
},
{
"words": [
{
"word": "the",
"time_begin": 14.550360000000001,
"time_end": 14.67047,
"confidence": 0.94
},
{
"word": " past",
"time_begin": 14.750530000000001,
"time_end": 15.311010000000001,
"confidence": 0.92
},
{
"word": " foreshadows",
"time_begin": 15.49117,
"time_end": 16.2318,
"confidence": 0.91
},
{
"word": " the",
"time_begin": 16.271839999999997,
"time_end": 16.391939999999998,
"confidence": 0.99
},
{
"word": " present,",
"time_begin": 16.43197,
"time_end": 16.992449999999998,
"confidence": 0.95
}
],
"transcription": "the past foreshadows the present,",
"language": "en",
"time_begin": 14.550360000000001,
"time_end": 16.992449999999998,
"speaker": "speaker_not_activated",
"channel": "channel_0"
},
{
"words": [
{
"word": "and",
"time_begin": 17.57295,
"time_end": 17.69305,
"confidence": 1
},
{
"word": " the",
"time_begin": 17.69305,
"time_end": 17.81315,
"confidence": 1
},
{
"word": " present",
"time_begin": 17.853189999999998,
"time_end": 18.35362,
"confidence": 0.97
},
{
"word": " hasn't",
"time_begin": 18.513749999999998,
"time_end": 18.93411,
"confidence": 0.83
},
{
"word": " been",
"time_begin": 19.01418,
"time_end": 19.1543,
"confidence": 0.97
},
{
"word": " written",
"time_begin": 19.19434,
"time_end": 19.514609999999998,
"confidence": 0.95
},
{
"word": " yet.",
"time_begin": 19.63471,
"time_end": 19.914949999999997,
"confidence": 1
}
],
"transcription": "and the present hasn't been written yet.",
"language": "en",
"time_begin": 17.57295,
"time_end": 19.914949999999997,
"speaker": "speaker_not_activated",
"channel": "channel_0"
}
],
"metadata": {
"provided_file_metadata": {
"nb_channels": 1,
"sample_rate": 44100,
"sample_width": 16,
"duration": 20.555465,
"original_file_type": "audio",
"number_similar_channels": 0
},
"nb_silent_channels": -1,
"total_speech_duration": 15.293079999999996,
"audio_conversion_time": 1.0848844051361084,
"vad_time": 0.20091629028320312,
"inference_time": 6.32569694519043,
"translation_time": 0.0000019073486328125,
"emotion_time": 4.76837158203125e-7,
"summarization_time": 4.76837158203125e-7,
"chapterization_time": 4.76837158203125e-7,
"total_transcription_time": 7.625882863998413
},
"speaker_mapping": [
{
"speaker": "speaker_not_activated",
"channel": "channel_0",
"time_begin": 0.17807,
"time_end": 1.53923
},
{
"speaker": "speaker_not_activated",
"channel": "channel_0",
"time_begin": 2.48004,
"time_end": 4.68192
},
{
"speaker": "speaker_not_activated",
"channel": "channel_0",
"time_begin": 5.52264,
"time_end": 7.7645599999999995
},
{
"speaker": "speaker_not_activated",
"channel": "channel_0",
"time_begin": 8.50519,
"time_end": 10.78714
},
{
"speaker": "speaker_not_activated",
"channel": "channel_0",
"time_begin": 11.68791,
"time_end": 14.109990000000002
},
{
"speaker": "speaker_not_activated",
"channel": "channel_0",
"time_begin": 14.550360000000001,
"time_end": 16.992449999999998
},
{
"speaker": "speaker_not_activated",
"channel": "channel_0",
"time_begin": 17.57295,
"time_end": 19.914949999999997
}
]
}
}