JSON

Results will always be in the format :

{
  "prediction": <awaited result>,
  "prediction_raw": <raw result>
}

The awaited result is what we want when we use an endpoint. This is the data you will need in 99% of cases.

The raw result contains all the data relating to the transcription process. It can contain metadata, extra data, and/or the direct output from the model without any change on our side.

For example, the output of audio transcription using the example file is:

{
  "prediction": [
    {
      "words": [
        {
          "word": "Split",
          "time_begin": 0.17807,
          "time_end": 0.65848,
          "confidence": 1
        },
        {
          "word": " infinity,",
          "time_begin": 0.87867,
          "time_end": 1.53923,
          "confidence": 1
        }
      ],
      "transcription": "Split infinity,",
      "language": "en",
      "time_begin": 0.17807,
      "time_end": 1.53923,
      "speaker": "speaker_not_activated",
      "channel": "channel_0"
    },
    {
      "words": [
        {
          "word": "in",
          "time_begin": 2.48004,
          "time_end": 2.6001399999999997,
          "confidence": 0.54
        },
        {
          "word": " a",
          "time_begin": 2.6001399999999997,
          "time_end": 2.72024,
          "confidence": 0.38
        },
        {
          "word": " time",
          "time_begin": 2.78029,
          "time_end": 3.30074,
          "confidence": 0.97
        },
        {
          "word": " when",
          "time_begin": 3.5009099999999997,
          "time_end": 3.6410299999999998,
          "confidence": 0.9
        },
        {
          "word": " less",
          "time_begin": 3.76113,
          "time_end": 4.0213600000000005,
          "confidence": 0.97
        },
        {
          "word": " is",
          "time_begin": 4.10142,
          "time_end": 4.22153,
          "confidence": 0.91
        },
        {
          "word": " more,",
          "time_begin": 4.32161,
          "time_end": 4.68192,
          "confidence": 0.88
        }
      ],
      "transcription": "in a time when less is more,",
      "language": "en",
      "time_begin": 2.48004,
      "time_end": 4.68192,
      "speaker": "speaker_not_activated",
      "channel": "channel_0"
    },
    {
      "words": [
        {
          "word": "where",
          "time_begin": 5.52264,
          "time_end": 5.70279,
          "confidence": 0.98
        },
        {
          "word": " too",
          "time_begin": 5.8229,
          "time_end": 6.04309,
          "confidence": 0.93
        },
        {
          "word": " much",
          "time_begin": 6.08312,
          "time_end": 6.44343,
          "confidence": 1
        },
        {
          "word": " is",
          "time_begin": 6.6035699999999995,
          "time_end": 6.7436799999999995,
          "confidence": 1
        },
        {
          "word": " never",
          "time_begin": 6.90382,
          "time_end": 7.2241,
          "confidence": 1
        },
        {
          "word": " enough,",
          "time_begin": 7.32418,
          "time_end": 7.7645599999999995,
          "confidence": 0.85
        }
      ],
      "transcription": "where too much is never enough,",
      "language": "en",
      "time_begin": 5.52264,
      "time_end": 7.7645599999999995,
      "speaker": "speaker_not_activated",
      "channel": "channel_0"
    },
    {
      "words": [
        {
          "word": "there",
          "time_begin": 8.50519,
          "time_end": 8.685350000000001,
          "confidence": 0.98
        },
        {
          "word": " is",
          "time_begin": 8.725380000000001,
          "time_end": 8.84548,
          "confidence": 1
        },
        {
          "word": " always",
          "time_begin": 9.00562,
          "time_end": 9.385950000000001,
          "confidence": 0.96
        },
        {
          "word": " hope",
          "time_begin": 9.486030000000001,
          "time_end": 9.8063,
          "confidence": 0.96
        },
        {
          "word": " for",
          "time_begin": 9.946420000000002,
          "time_end": 10.06653,
          "confidence": 1
        },
        {
          "word": " the",
          "time_begin": 10.086540000000001,
          "time_end": 10.206650000000002,
          "confidence": 0.96
        },
        {
          "word": " future.",
          "time_begin": 10.2667,
          "time_end": 10.78714,
          "confidence": 0.99
        }
      ],
      "transcription": "there is always hope for the future.",
      "language": "en",
      "time_begin": 8.50519,
      "time_end": 10.78714,
      "speaker": "speaker_not_activated",
      "channel": "channel_0"
    },
    {
      "words": [
        {
          "word": "The",
          "time_begin": 11.68791,
          "time_end": 11.80802,
          "confidence": 1
        },
        {
          "word": " future",
          "time_begin": 11.868070000000001,
          "time_end": 12.30845,
          "confidence": 1
        },
        {
          "word": " can",
          "time_begin": 12.368500000000001,
          "time_end": 12.54865,
          "confidence": 1
        },
        {
          "word": " be",
          "time_begin": 12.568670000000001,
          "time_end": 12.68877,
          "confidence": 1
        },
        {
          "word": " read",
          "time_begin": 12.788860000000001,
          "time_end": 13.029060000000001,
          "confidence": 0.88
        },
        {
          "word": " from",
          "time_begin": 13.14916,
          "time_end": 13.3093,
          "confidence": 1
        },
        {
          "word": " the",
          "time_begin": 13.349340000000002,
          "time_end": 13.46944,
          "confidence": 0.88
        },
        {
          "word": " past,",
          "time_begin": 13.50947,
          "time_end": 14.109990000000002,
          "confidence": 1
        }
      ],
      "transcription": "The future can be read from the past,",
      "language": "en",
      "time_begin": 11.68791,
      "time_end": 14.109990000000002,
      "speaker": "speaker_not_activated",
      "channel": "channel_0"
    },
    {
      "words": [
        {
          "word": "the",
          "time_begin": 14.550360000000001,
          "time_end": 14.67047,
          "confidence": 0.94
        },
        {
          "word": " past",
          "time_begin": 14.750530000000001,
          "time_end": 15.311010000000001,
          "confidence": 0.92
        },
        {
          "word": " foreshadows",
          "time_begin": 15.49117,
          "time_end": 16.2318,
          "confidence": 0.91
        },
        {
          "word": " the",
          "time_begin": 16.271839999999997,
          "time_end": 16.391939999999998,
          "confidence": 0.99
        },
        {
          "word": " present,",
          "time_begin": 16.43197,
          "time_end": 16.992449999999998,
          "confidence": 0.95
        }
      ],
      "transcription": "the past foreshadows the present,",
      "language": "en",
      "time_begin": 14.550360000000001,
      "time_end": 16.992449999999998,
      "speaker": "speaker_not_activated",
      "channel": "channel_0"
    },
    {
      "words": [
        {
          "word": "and",
          "time_begin": 17.57295,
          "time_end": 17.69305,
          "confidence": 1
        },
        {
          "word": " the",
          "time_begin": 17.69305,
          "time_end": 17.81315,
          "confidence": 1
        },
        {
          "word": " present",
          "time_begin": 17.853189999999998,
          "time_end": 18.35362,
          "confidence": 0.97
        },
        {
          "word": " hasn't",
          "time_begin": 18.513749999999998,
          "time_end": 18.93411,
          "confidence": 0.83
        },
        {
          "word": " been",
          "time_begin": 19.01418,
          "time_end": 19.1543,
          "confidence": 0.97
        },
        {
          "word": " written",
          "time_begin": 19.19434,
          "time_end": 19.514609999999998,
          "confidence": 0.95
        },
        {
          "word": " yet.",
          "time_begin": 19.63471,
          "time_end": 19.914949999999997,
          "confidence": 1
        }
      ],
      "transcription": "and the present hasn't been written yet.",
      "language": "en",
      "time_begin": 17.57295,
      "time_end": 19.914949999999997,
      "speaker": "speaker_not_activated",
      "channel": "channel_0"
    }
  ],
  "prediction_raw": {
    "transcription": [
      {
        "words": [
          {
            "word": "Split",
            "time_begin": 0.17807,
            "time_end": 0.65848,
            "confidence": 1
          },
          {
            "word": " infinity,",
            "time_begin": 0.87867,
            "time_end": 1.53923,
            "confidence": 1
          }
        ],
        "transcription": "Split infinity,",
        "language": "en",
        "time_begin": 0.17807,
        "time_end": 1.53923,
        "speaker": "speaker_not_activated",
        "channel": "channel_0"
      },
      {
        "words": [
          {
            "word": "in",
            "time_begin": 2.48004,
            "time_end": 2.6001399999999997,
            "confidence": 0.54
          },
          {
            "word": " a",
            "time_begin": 2.6001399999999997,
            "time_end": 2.72024,
            "confidence": 0.38
          },
          {
            "word": " time",
            "time_begin": 2.78029,
            "time_end": 3.30074,
            "confidence": 0.97
          },
          {
            "word": " when",
            "time_begin": 3.5009099999999997,
            "time_end": 3.6410299999999998,
            "confidence": 0.9
          },
          {
            "word": " less",
            "time_begin": 3.76113,
            "time_end": 4.0213600000000005,
            "confidence": 0.97
          },
          {
            "word": " is",
            "time_begin": 4.10142,
            "time_end": 4.22153,
            "confidence": 0.91
          },
          {
            "word": " more,",
            "time_begin": 4.32161,
            "time_end": 4.68192,
            "confidence": 0.88
          }
        ],
        "transcription": "in a time when less is more,",
        "language": "en",
        "time_begin": 2.48004,
        "time_end": 4.68192,
        "speaker": "speaker_not_activated",
        "channel": "channel_0"
      },
      {
        "words": [
          {
            "word": "where",
            "time_begin": 5.52264,
            "time_end": 5.70279,
            "confidence": 0.98
          },
          {
            "word": " too",
            "time_begin": 5.8229,
            "time_end": 6.04309,
            "confidence": 0.93
          },
          {
            "word": " much",
            "time_begin": 6.08312,
            "time_end": 6.44343,
            "confidence": 1
          },
          {
            "word": " is",
            "time_begin": 6.6035699999999995,
            "time_end": 6.7436799999999995,
            "confidence": 1
          },
          {
            "word": " never",
            "time_begin": 6.90382,
            "time_end": 7.2241,
            "confidence": 1
          },
          {
            "word": " enough,",
            "time_begin": 7.32418,
            "time_end": 7.7645599999999995,
            "confidence": 0.85
          }
        ],
        "transcription": "where too much is never enough,",
        "language": "en",
        "time_begin": 5.52264,
        "time_end": 7.7645599999999995,
        "speaker": "speaker_not_activated",
        "channel": "channel_0"
      },
      {
        "words": [
          {
            "word": "there",
            "time_begin": 8.50519,
            "time_end": 8.685350000000001,
            "confidence": 0.98
          },
          {
            "word": " is",
            "time_begin": 8.725380000000001,
            "time_end": 8.84548,
            "confidence": 1
          },
          {
            "word": " always",
            "time_begin": 9.00562,
            "time_end": 9.385950000000001,
            "confidence": 0.96
          },
          {
            "word": " hope",
            "time_begin": 9.486030000000001,
            "time_end": 9.8063,
            "confidence": 0.96
          },
          {
            "word": " for",
            "time_begin": 9.946420000000002,
            "time_end": 10.06653,
            "confidence": 1
          },
          {
            "word": " the",
            "time_begin": 10.086540000000001,
            "time_end": 10.206650000000002,
            "confidence": 0.96
          },
          {
            "word": " future.",
            "time_begin": 10.2667,
            "time_end": 10.78714,
            "confidence": 0.99
          }
        ],
        "transcription": "there is always hope for the future.",
        "language": "en",
        "time_begin": 8.50519,
        "time_end": 10.78714,
        "speaker": "speaker_not_activated",
        "channel": "channel_0"
      },
      {
        "words": [
          {
            "word": "The",
            "time_begin": 11.68791,
            "time_end": 11.80802,
            "confidence": 1
          },
          {
            "word": " future",
            "time_begin": 11.868070000000001,
            "time_end": 12.30845,
            "confidence": 1
          },
          {
            "word": " can",
            "time_begin": 12.368500000000001,
            "time_end": 12.54865,
            "confidence": 1
          },
          {
            "word": " be",
            "time_begin": 12.568670000000001,
            "time_end": 12.68877,
            "confidence": 1
          },
          {
            "word": " read",
            "time_begin": 12.788860000000001,
            "time_end": 13.029060000000001,
            "confidence": 0.88
          },
          {
            "word": " from",
            "time_begin": 13.14916,
            "time_end": 13.3093,
            "confidence": 1
          },
          {
            "word": " the",
            "time_begin": 13.349340000000002,
            "time_end": 13.46944,
            "confidence": 0.88
          },
          {
            "word": " past,",
            "time_begin": 13.50947,
            "time_end": 14.109990000000002,
            "confidence": 1
          }
        ],
        "transcription": "The future can be read from the past,",
        "language": "en",
        "time_begin": 11.68791,
        "time_end": 14.109990000000002,
        "speaker": "speaker_not_activated",
        "channel": "channel_0"
      },
      {
        "words": [
          {
            "word": "the",
            "time_begin": 14.550360000000001,
            "time_end": 14.67047,
            "confidence": 0.94
          },
          {
            "word": " past",
            "time_begin": 14.750530000000001,
            "time_end": 15.311010000000001,
            "confidence": 0.92
          },
          {
            "word": " foreshadows",
            "time_begin": 15.49117,
            "time_end": 16.2318,
            "confidence": 0.91
          },
          {
            "word": " the",
            "time_begin": 16.271839999999997,
            "time_end": 16.391939999999998,
            "confidence": 0.99
          },
          {
            "word": " present,",
            "time_begin": 16.43197,
            "time_end": 16.992449999999998,
            "confidence": 0.95
          }
        ],
        "transcription": "the past foreshadows the present,",
        "language": "en",
        "time_begin": 14.550360000000001,
        "time_end": 16.992449999999998,
        "speaker": "speaker_not_activated",
        "channel": "channel_0"
      },
      {
        "words": [
          {
            "word": "and",
            "time_begin": 17.57295,
            "time_end": 17.69305,
            "confidence": 1
          },
          {
            "word": " the",
            "time_begin": 17.69305,
            "time_end": 17.81315,
            "confidence": 1
          },
          {
            "word": " present",
            "time_begin": 17.853189999999998,
            "time_end": 18.35362,
            "confidence": 0.97
          },
          {
            "word": " hasn't",
            "time_begin": 18.513749999999998,
            "time_end": 18.93411,
            "confidence": 0.83
          },
          {
            "word": " been",
            "time_begin": 19.01418,
            "time_end": 19.1543,
            "confidence": 0.97
          },
          {
            "word": " written",
            "time_begin": 19.19434,
            "time_end": 19.514609999999998,
            "confidence": 0.95
          },
          {
            "word": " yet.",
            "time_begin": 19.63471,
            "time_end": 19.914949999999997,
            "confidence": 1
          }
        ],
        "transcription": "and the present hasn't been written yet.",
        "language": "en",
        "time_begin": 17.57295,
        "time_end": 19.914949999999997,
        "speaker": "speaker_not_activated",
        "channel": "channel_0"
      }
    ],
    "metadata": {
      "provided_file_metadata": {
        "nb_channels": 1,
        "sample_rate": 44100,
        "sample_width": 16,
        "duration": 20.555465,
        "original_file_type": "audio",
        "number_similar_channels": 0
      },
      "nb_silent_channels": -1,
      "total_speech_duration": 15.293079999999996,
      "audio_conversion_time": 1.0848844051361084,
      "vad_time": 0.20091629028320312,
      "inference_time": 6.32569694519043,
      "translation_time": 0.0000019073486328125,
      "emotion_time": 4.76837158203125e-7,
      "summarization_time": 4.76837158203125e-7,
      "chapterization_time": 4.76837158203125e-7,
      "total_transcription_time": 7.625882863998413
    },
    "speaker_mapping": [
      {
        "speaker": "speaker_not_activated",
        "channel": "channel_0",
        "time_begin": 0.17807,
        "time_end": 1.53923
      },
      {
        "speaker": "speaker_not_activated",
        "channel": "channel_0",
        "time_begin": 2.48004,
        "time_end": 4.68192
      },
      {
        "speaker": "speaker_not_activated",
        "channel": "channel_0",
        "time_begin": 5.52264,
        "time_end": 7.7645599999999995
      },
      {
        "speaker": "speaker_not_activated",
        "channel": "channel_0",
        "time_begin": 8.50519,
        "time_end": 10.78714
      },
      {
        "speaker": "speaker_not_activated",
        "channel": "channel_0",
        "time_begin": 11.68791,
        "time_end": 14.109990000000002
      },
      {
        "speaker": "speaker_not_activated",
        "channel": "channel_0",
        "time_begin": 14.550360000000001,
        "time_end": 16.992449999999998
      },
      {
        "speaker": "speaker_not_activated",
        "channel": "channel_0",
        "time_begin": 17.57295,
        "time_end": 19.914949999999997
      }
    ]
  }
}