#include "fmp4boxes.hpp"
#include "debug/trace.hpp"
#include "util/Base64.hpp"
#include <algorithm>
#include <cstring>

namespace twitch {
namespace media {
size_t MoovBox::write_uint8(uint8_t* data, size_t ofst, uint8_t val)
{
    if (data) {
        data[ofst] = val;
    }

    return 1;
}

size_t MoovBox::write_uint16(uint8_t* data, size_t ofst, uint16_t val)
{
    if (data) {
        data[ofst + 0] = (val >> 8) & 0xFF;
        data[ofst + 1] = (val >> 0) & 0xFF;
    }

    return 2;
}

size_t MoovBox::write_uint24(uint8_t* data, size_t ofst, uint32_t val)
{
    if (data) {
        data[ofst + 0] = (val >> 16) & 0xFF;
        data[ofst + 1] = (val >> 8) & 0xFF;
        data[ofst + 2] = (val >> 0) & 0xFF;
    }

    return 3;
}

size_t MoovBox::write_uint32(uint8_t* data, size_t ofst, uint32_t val)
{
    if (data) {
        data[ofst + 0] = (val >> 24) & 0xFF;
        data[ofst + 1] = (val >> 16) & 0xFF;
        data[ofst + 2] = (val >> 8) & 0xFF;
        data[ofst + 3] = (val >> 0) & 0xFF;
    }

    return 4;
}

size_t MoovBox::write_uint64(uint8_t* data, size_t ofst, uint64_t val)
{
    if (data) {
        data[ofst + 0] = (val >> 56) & 0xFF;
        data[ofst + 1] = (val >> 48) & 0xFF;
        data[ofst + 2] = (val >> 40) & 0xFF;
        data[ofst + 3] = (val >> 32) & 0xFF;
        data[ofst + 4] = (val >> 24) & 0xFF;
        data[ofst + 5] = (val >> 16) & 0xFF;
        data[ofst + 6] = (val >> 8) & 0xFF;
        data[ofst + 7] = (val >> 0) & 0xFF;
    }

    return 8;
}

size_t MoovBox::write_data(uint8_t* data, size_t ofst, const uint8_t* src, size_t len)
{
    if (data) {
        if (src) {
            memcpy(data + ofst, src, len);
        } else {
            memset(data + ofst, 0, len);
        }
    }

    return len;
}

size_t MoovBox::write_data(uint8_t* data, size_t ofst, const std::vector<uint8_t>& payload)
{
    return write_data(data, ofst, payload.data(), payload.size());
}

size_t MoovBox::write_string(uint8_t* data, size_t ofst, const std::string& str)
{
    const char* s = str.c_str(); // convert to c string (std::string can contain nulls)
    return write_data(data, ofst, (const uint8_t*)s, strlen(s) + 1);
}

// update_size must be called to set the correct size.
size_t MoovBox::make_box(uint8_t* data, size_t ofst, uint32_t type)
{
    write_uint32(data, ofst + 0, BOX_HEADER_SIZE);
    write_uint32(data, ofst + 4, type);
    return BOX_HEADER_SIZE;
}

size_t MoovBox::make_full_box(uint8_t* data, size_t ofst, uint32_t type, uint8_t version, uint32_t flags)
{
    write_uint32(data, ofst + 0, FULLBOX_HEADER_SIZE);
    write_uint32(data, ofst + 4, type);
    write_uint8(data, ofst + 8, version);
    write_uint24(data, ofst + 9, flags);
    return FULLBOX_HEADER_SIZE;
}

size_t MoovBox::update_size(uint8_t* data, size_t ofst, size_t size)
{
    write_uint32(data, ofst, static_cast<uint32_t>(size));
    return size;
}

size_t MoovBox::make_skip(uint8_t* data, uint32_t size)
{
    size_t i = make_box(data, 0, MP4_skip);
    i += write_data(data, i, nullptr, size);
    return update_size(data, 0, i);
}

std::vector<uint8_t> MoovBox::make_skip(uint32_t size)
{
    std::vector<uint8_t> skip;
    skip.resize(sizeof(uint32_t) + sizeof(uint32_t) + size);
    make_skip(skip.data(), size);
    return skip;
}

size_t MoovBox::make_ftyp(uint8_t* data, uint32_t major_brand,
    uint32_t minor_version,
    const std::vector<uint32_t>& compatible_brands)
{
    size_t i = make_box(data, 0, MP4_ftyp);
    i += write_uint32(data, i, major_brand);
    i += write_uint32(data, i, minor_version);

    for (auto brand : compatible_brands) {
        i += write_uint32(data, i, brand);
    }

    return update_size(data, 0, i);
}

size_t MoovBox::make_moov(uint8_t* data, const fmp4& mp4)
{
    size_t i = make_box(data, 0, MP4_moov);
    i += make_mvhd(data, i, static_cast<uint32_t>(mp4.nextTrackId()));

    for (const auto& track : mp4.tracks()) {
        i += make_trak(data, i, *track.second);
    }

    i += make_mvex(data, i, mp4);

    // write unique pssh boxes
    std::vector<std::vector<uint8_t>> pssh;
    for (const auto& track : mp4.tracks()) {
        for (const auto& info : track.second->getEncryptionInfo()) {
            pssh.insert(pssh.end(), info.pssh.begin(), info.pssh.end());
        }
    }

    std::sort(pssh.begin(), pssh.end());
    auto end = std::unique(pssh.begin(), pssh.end());
    std::for_each(pssh.begin(), end, [data, &i](const std::vector<uint8_t>& box) {
        // TODO sanity check pssh box
        i += write_data(data, i, box);
    });

    return update_size(data, 0, i);
}

size_t MoovBox::make_pssh(uint8_t* data, size_t ofst, const Uuid& systemId,
    const std::vector<KeyId_t>& kids,
    const uint8_t* payload_data, size_t payload_size)
{
    int32_t flags = 0;
    uint8_t version = kids.empty() ? 0 : 1;
    size_t i = make_full_box(data, ofst, MP4_pssh, version, flags);

    auto systemIdBytes = systemId.toBytes();
    i += write_data(data, ofst + i, systemIdBytes.data(), systemIdBytes.size());
    if (0 < version) {
        i += write_uint32(data, ofst + i, static_cast<uint32_t>(kids.size()));
        for (const auto& kid : kids) {
            i += write_data(data, ofst + i, kid.data(), kid.size());
        }
    }

    i += write_uint32(data, ofst + i, static_cast<uint32_t>(payload_size));
    i += write_data(data, ofst + i, payload_data, payload_size);
    return update_size(data, ofst, i);
}

size_t MoovBox::make_emsg(uint8_t* data, const std::string& scheme_id_uri, const std::string& value,
    uint32_t timescale, uint32_t presentation_time_delta,
    uint32_t event_duration, uint32_t id, const std::vector<uint8_t>& message_data)
{
    size_t i = make_full_box(data, 0, MP4_emsg, 0, 0);
    i += write_string(data, i, scheme_id_uri);
    i += write_string(data, i, value);
    i += write_uint32(data, i, timescale);
    i += write_uint32(data, i, presentation_time_delta);
    i += write_uint32(data, i, event_duration);
    i += write_uint32(data, i, id);
    i += write_data(data, i, message_data);
    return update_size(data, 0, i);
}

size_t MoovBox::make_moof(uint8_t* data, uint32_t sequence_number, uint32_t data_offset, const fmp4& mp4)
{
    size_t i = make_box(data, 0, MP4_moof);
    i += make_mfhd(data, i, sequence_number);

    for (const auto& track : mp4.tracks()) {
        i += make_traf(data, i, data_offset, *track.second);
        data_offset += static_cast<uint32_t>(track.second->data().size());
    }

    return update_size(data, 0, i);
}

size_t MoovBox::make_mdat(uint8_t* data, uint32_t size)
{
    make_box(data, 0, MP4_mdat);
    return update_size(data, 0, size);
}

size_t MoovBox::make_mvhd(uint8_t* data, size_t ofst, uint32_t next_track_id)
{
    size_t i = make_full_box(data, ofst, MP4_mvhd, 0, 0);
    i += write_uint32(data, ofst + i, 0); // creation_time;
    i += write_uint32(data, ofst + i, 0); // modification_time;
    i += write_uint32(data, ofst + i, MOOV_TIMESCALE); // timescale
    i += write_uint32(data, ofst + i, 0); // duration; (What about LIVE?)
    i += write_uint32(data, ofst + i, 0x00010000); // Preferred rate
    i += write_uint16(data, ofst + i, 0x0100); // Preferred volume
    i += write_uint16(data, ofst + i, 0); // reserved
    i += write_uint32(data, ofst + i, 0); // reserved
    i += write_uint32(data, ofst + i, 0); // reserved
    i += write_uint32(data, ofst + i, 0x00010000); // Unity matrix
    i += write_uint32(data, ofst + i, 0); // Unity matrix
    i += write_uint32(data, ofst + i, 0); // Unity matrix
    i += write_uint32(data, ofst + i, 0); // Unity matrix
    i += write_uint32(data, ofst + i, 0x00010000); // Unity matrix
    i += write_uint32(data, ofst + i, 0); // Unity matrix
    i += write_uint32(data, ofst + i, 0); // Unity matrix
    i += write_uint32(data, ofst + i, 0); // Unity matrix
    i += write_uint32(data, ofst + i, 0x40000000); // Unity matrix
    i += write_uint32(data, ofst + i, 0); // pre_defined
    i += write_uint32(data, ofst + i, 0); // pre_defined
    i += write_uint32(data, ofst + i, 0); // pre_defined
    i += write_uint32(data, ofst + i, 0); // pre_defined
    i += write_uint32(data, ofst + i, 0); // pre_defined
    i += write_uint32(data, ofst + i, 0); // pre_defined
    i += write_uint32(data, ofst + i, next_track_id);
    return update_size(data, ofst, i);
}

/* By creating an mehd with 0 duration fragments we can force the browser into "low delay mode"
  https://bugs.chromium.org/p/chromium/issues/detail?id=465324
  Hmm. the source code does not seem to mach this (same results for both zero duration and not included)
  https://cs.chromium.org/chromium/src/media/formats/mp4/mp4_stream_parser.cc?type=cs&q=mehd+package:%5Echromium$&l=486
  Leaving in as explicit is better than implicit.
*/
size_t MoovBox::make_mehd(uint8_t* data, size_t ofst, int64_t fragment_duration)
{
    uint8_t version = !!(fragment_duration & 0x7FFFFFFF00000000ll) ? 1 : 0;
    size_t i = make_full_box(data, ofst, MP4_mehd, version, 0);

    if (1 == version) {
        i += write_uint64(data, ofst + i, static_cast<uint64_t>(fragment_duration)); // fragment_duration
    } else {
        i += write_uint32(data, ofst + i, static_cast<uint32_t>(fragment_duration)); // fragment_duration
    }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_trex(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    size_t i = make_full_box(data, ofst, MP4_trex, 0, 0);
    i += write_uint32(data, ofst + i, track.getId());
    i += write_uint32(data, ofst + i, 1); // default_sample_description_index
    i += write_uint32(data, ofst + i, track.getTrackRun().default_sample_duration); // default_sample_duration
    i += write_uint32(data, ofst + i, track.getTrackRun().default_sample_size); // default_sample_size
    i += write_uint32(data, ofst + i, track.getTrackRun().default_sample_flags); // default_sample_flags
    return update_size(data, ofst, i);
}

size_t MoovBox::make_mvex(uint8_t* data, size_t ofst, const fmp4& mp4)
{
    size_t i = make_box(data, ofst, MP4_mvex);
    // i += make_mehd(data, ofst + i, mp4.lowDelayMode() ? 0 : mp4.fragmentDuration());

    for (const auto& track : mp4.tracks()) {
        i += make_trex(data, ofst + i, *track.second);
    }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_tkhd(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    // flags is a 24-bit integer with flags; the following values are defined:
    // Track_enabled: Indicates that the track is enabled. Flag value is 0x000001.
    // A disabled track (the low bit is zero) is treated as if it were not present.
    // Track_in_movie: Indicates that the track is used in the presentation. Flag value is 0x000002.
    // Track_in_preview: Indicates that the track is used when previewing the presentation. Flag value is 0x000004.
    size_t i = make_full_box(data, ofst, MP4_tkhd, 0, 3);
    i += write_uint32(data, ofst + i, 0); // creation_time;
    i += write_uint32(data, ofst + i, 0); // modification_time;
    i += write_uint32(data, ofst + i, track.getId()); // track_id
    i += write_uint32(data, ofst + i, 0); // reserved
    i += write_uint32(data, ofst + i, 0); // duration
    i += write_uint32(data, ofst + i, 0); // reserved
    i += write_uint32(data, ofst + i, 0); // reserved
    i += write_uint16(data, ofst + i, 0); // layer
    i += write_uint16(data, ofst + i, 0); // alternate_group

    if (MP4_soun == track.getHandlerType()) {
        // TODO track volume?
        i += write_uint16(data, ofst + i, 256);
    } else {
        i += write_uint16(data, ofst + i, 0); // 0 if video or webvtt
    }

    i += write_uint16(data, ofst + i, 0); // reserved
    i += write_uint32(data, ofst + i, 0x00010000); // Unity matrix
    i += write_uint32(data, ofst + i, 0); // Unity matrix
    i += write_uint32(data, ofst + i, 0); // Unity matrix
    i += write_uint32(data, ofst + i, 0); // Unity matrix
    i += write_uint32(data, ofst + i, 0x00010000); // Unity matrix
    i += write_uint32(data, ofst + i, 0); // Unity matrix
    i += write_uint32(data, ofst + i, 0); // Unity matrix
    i += write_uint32(data, ofst + i, 0); // Unity matrix
    i += write_uint32(data, ofst + i, 0x40000000); // Unity matrix

    if (MP4_vide == track.getHandlerType()) {
        i += write_uint32(data, ofst + i, track.getVideoInfo().width << 16);
        i += write_uint32(data, ofst + i, track.getVideoInfo().height << 16);
    } else {
        i += write_uint32(data, ofst + i, 0); // 0 for audio
        i += write_uint32(data, ofst + i, 0); // 0 for audio
    }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_elst(uint8_t* data, size_t ofst, int32_t media_time)
{
    const uint8_t version = 0;
    uint32_t entry_count = 1; // MSE supports only a single elst entry
    size_t i = make_full_box(data, ofst, MP4_elst, version, 0);
    i += write_uint32(data, ofst + i, entry_count); // entry_count

    for (uint32_t entry = 0; entry < entry_count; ++entry) {
        i += write_uint32(data, ofst + i, 0); // segment_duration
        i += write_sint32(data, ofst + i, media_time);

        i += write_sint16(data, ofst + i, 1); // media_rate_integer
        i += write_sint16(data, ofst + i, 0); // media_rate_fraction
    }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_edts(uint8_t* data, size_t ofst, int32_t media_time)
{
    size_t i = make_box(data, ofst, MP4_edts);
    i += make_elst(data, ofst + i, media_time); // MSE supports only a single elst
    return update_size(data, ofst, i);
}

size_t MoovBox::make_mdhd(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    size_t i = make_full_box(data, ofst, MP4_mdhd, 0, 0);
    i += write_uint32(data, ofst + i, 0); // creation_time
    i += write_uint32(data, ofst + i, 0); // modification_time
    i += write_uint32(data, ofst + i, track.getTimescale()); // timescale, for audio use sample rate!
    i += write_uint32(data, ofst + i, 0); // duration (What about LIVE?)
    i += write_uint16(data, ofst + i, 0x55c4); // language = und
    i += write_uint16(data, ofst + i, 0); // pre_defined
    return update_size(data, ofst, i);
}

size_t MoovBox::make_hdlr(uint8_t* data, size_t ofst, uint32_t handler_type, const std::string& name)
{
    size_t i = make_full_box(data, ofst, MP4_hdlr, 0, 0);
    i += write_uint32(data, ofst + i, 0); // pre_defined
    i += write_uint32(data, ofst + i, handler_type); // handler_type
    i += write_uint32(data, ofst + i, 0); // reserved
    i += write_uint32(data, ofst + i, 0); // reserved
    i += write_uint32(data, ofst + i, 0); // reserved
    i += write_string(data, ofst + i, name); // name;
    return update_size(data, ofst, i);
}

size_t MoovBox::make_vmhd(uint8_t* data, size_t ofst)
{
    size_t i = make_full_box(data, ofst, MP4_vmhd, 0, 1); // flags is 1 for some reason
    i += write_uint16(data, ofst + i, 0); // graphicsmode
    i += write_uint16(data, ofst + i, 0); // opcolor_r
    i += write_uint16(data, ofst + i, 0); // opcolor_g
    i += write_uint16(data, ofst + i, 0); // opcolor_b
    return update_size(data, ofst, i);
}

size_t MoovBox::make_smhd(uint8_t* data, size_t ofst)
{
    size_t i = make_full_box(data, ofst, MP4_smhd, 0, 0);
    i += write_uint16(data, ofst + i, 0); // balance;
    i += write_uint16(data, ofst + i, 0); // reserved;
    return update_size(data, ofst, i);
}

size_t MoovBox::make_nmhd(uint8_t* data, size_t ofst)
{
    size_t i = make_full_box(data, ofst, MP4_nmhd, 0, 0);
    return update_size(data, ofst, i);
}

size_t MoovBox::make_url(uint8_t* data, size_t ofst, const std::string& url)
{
    // flag, data is in the same file as the Movie Box containing this data reference.
    size_t i = make_full_box(data, ofst, fourcc("url "), 0, 0x000001);

    if (url.size() > 0) {
        i += write_string(data, ofst + i, url);
    }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_dref(uint8_t* data, size_t ofst)
{
    size_t i = make_full_box(data, ofst, MP4_dref, 0, 0);
    i += write_uint32(data, ofst + i, 1); // entry_count
    i += make_url(data, ofst + i, std::string());
    return update_size(data, ofst, i);
}

size_t MoovBox::make_dinf(uint8_t* data, size_t ofst)
{
    size_t i = make_box(data, ofst, MP4_dinf);
    i += make_dref(data, ofst + i);
    return update_size(data, ofst, i);
}

size_t MoovBox::make_visual_sample_entry(uint8_t* data, size_t ofst, uint32_t type, const Mp4Track& track)
{
    size_t i = make_box(data, ofst, type);
    // SampleEntry fields
    i += write_data(data, ofst + i, nullptr, 6); // reserved
    i += write_uint16(data, ofst + i, 1); // data_reference_index
    //VisualSampleEntry fields
    i += write_uint16(data, ofst + i, 0); // pre_defined
    i += write_uint16(data, ofst + i, 0); // reserved
    i += write_data(data, ofst + i, nullptr, 12); // pre_defined
    i += write_uint16(data, ofst + i, static_cast<uint16_t>(track.getVideoInfo().width)); // width
    i += write_uint16(data, ofst + i, static_cast<uint16_t>(track.getVideoInfo().height)); // height
    i += write_uint32(data, ofst + i, 0x00480000); // horizresolution (72dpi)
    i += write_uint32(data, ofst + i, 0x00480000); // vertresolution (72dpi)
    i += write_uint32(data, ofst + i, 0); // reserved
    i += write_uint16(data, ofst + i, 1); // frame_count
    i += write_data(data, ofst + i, nullptr, 32); // compressorname
    i += write_uint16(data, ofst + i, 0x0018); // depth
    i += write_uint16(data, ofst + i, 0xFFFF); // pre_defined
    return update_size(data, ofst, i);
}

size_t MoovBox::make_avcc(uint8_t* data, size_t ofst, const std::vector<uint8_t>& extradata)
{
    size_t i = make_box(data, ofst, MP4_avcC);
    i += write_data(data, ofst + i, extradata.data(), extradata.size());
    return update_size(data, ofst, i);
}

size_t MoovBox::make_avc1(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    size_t i = make_visual_sample_entry(data, ofst, track.isProtected() ? MP4_encv : MP4_avc1, track);
    i += make_avcc(data, ofst + i, track.getCodecData());

    if (track.isProtected()) {
        i += make_sinf(data, ofst + i, track);
    }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_vp09(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    size_t i = make_visual_sample_entry(data, ofst, MP4_vp09, track);
    // Add VP Codec Configuration Box
    i += make_vpcC(data, ofst + i, track);

    return update_size(data, ofst, i);
}

size_t MoovBox::make_vpcC(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    if (track.getCodecData().size() >= 8) {
        size_t i = make_full_box(data, ofst, MP4_vpcC, 1, 0);
        i += write_data(data, ofst + i, track.getCodecData().data(), 8);
        return update_size(data, ofst, i);
    } else {
        return 0;
    }
}

size_t MoovBox::make_wvtt(uint8_t* data, size_t ofst)
{
    // WVTTSampleEntry
    size_t i = make_box(data, ofst, MP4_wvtt);
    // SampleEntry fields
    i += write_data(data, ofst + i, nullptr, 6); // reserved
    i += write_uint16(data, ofst + i, 1); // data_reference_index

    // WVTTSampleEntry fields
    i += make_vttC(data, ofst + i);
    //i += make_vlab(data, ofst + i); // Recommended, but optional

    return update_size(data, ofst, i);
}

size_t MoovBox::make_vttC(uint8_t* data, size_t ofst)
{
    // WebVTTConfigurationBox
    size_t i = make_box(data, ofst, MP4_vttC);
    i += write_string(data, ofst + i, std::string("WEBVTT"));

    return update_size(data, ofst, i);
}

size_t MoovBox::make_vlab(uint8_t* data, size_t ofst)
{
    // WebVTTSourceLabelBox
    size_t i = make_box(data, ofst, MP4_vlab);
    i += write_string(data, ofst + i, std::string());

    return update_size(data, ofst, i);
}

size_t MoovBox::make_Opus(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    size_t i = make_box(data, ofst, MP4_opus);
    i += write_data(data, ofst + i, nullptr, 6); // reserved
    i += write_uint16(data, ofst + i, 1); // data_reference_index
    i += write_data(data, ofst + i, nullptr, 8); // reserved
    i += write_uint16(data, ofst + i, track.getAudioInfo().channelcount); // channelcount
    i += write_uint16(data, ofst + i, track.getAudioInfo().samplesize); // samplesize
    i += write_uint16(data, ofst + i, 0); // pre_defined
    i += write_uint16(data, ofst + i, 0); // reserved
    i += write_uint32(data, ofst + i, track.getAudioInfo().samplerate << 16); // sample rate as fixed-point number

    // add opus speific box
    i += make_dOps(data, ofst + i, track);

    return update_size(data, ofst, i);
}

size_t MoovBox::make_dOps(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    if (track.getCodecData().size() >= 11) {
        size_t i = make_box(data, ofst, MP4_dops);
        i += write_data(data, ofst + i, track.getCodecData().data(), 11);
        return update_size(data, ofst, i);
    } else {
        return 0;
    }
}

// best reference: http://xhelmboyx.tripod.com/formats/mp4-layout.txt
size_t MoovBox::write_esds_desc(uint8_t* data, size_t ofst, uint8_t desc, size_t len)
{
    if (data) {
        write_uint8(data, ofst + 0, desc);
        write_uint8(data, ofst + 1, static_cast<uint8_t>((len >> 21) | 0x80));
        write_uint8(data, ofst + 2, static_cast<uint8_t>((len >> 14) | 0x80));
        write_uint8(data, ofst + 3, static_cast<uint8_t>((len >> 7) | 0x80));
        write_uint8(data, ofst + 4, static_cast<uint8_t>(len & 0x7F));
    }

    return 5;
}

size_t MoovBox::make_esds(uint8_t* data, size_t ofst, uint16_t trackId, const std::vector<uint8_t>& extradata)
{
    int32_t maxBitrate = 0;
    int32_t bufferSize = 0;
    int32_t avgBitrate = 0;
    int32_t dec_size = static_cast<uint32_t>(extradata.size() ? extradata.size() + 5 : 0);
    size_t i = make_full_box(data, ofst, MP4_esds, 0, 0);

    i += write_esds_desc(data, ofst + i, 0x03, 3 + (5 + 13) + dec_size + (5 + 1));
    i += write_uint16(data, ofst + i, trackId);
    i += write_uint8(data, ofst + i, 0); // flags

    i += write_esds_desc(data, ofst + i, 0x04, 13 + dec_size);
    i += write_uint8(data, ofst + i, 0x40); // codec id
    i += write_uint8(data, ofst + i, 0x15); // flags (= Audiostream)
    i += write_uint24(data, ofst + i, bufferSize); //
    i += write_uint32(data, ofst + i, maxBitrate); // max bitrate
    i += write_uint32(data, ofst + i, avgBitrate); // average bitrate (0 for vbr)

    if (dec_size) {
        i += write_esds_desc(data, ofst + i, 0x05, extradata.size());
        i += write_data(data, ofst + i, extradata.data(), extradata.size());
    }

    i += write_esds_desc(data, ofst + i, 0x06, 1);
    i += write_uint8(data, ofst + i, 0x02);
    return update_size(data, ofst, i);
}

size_t MoovBox::make_mp4a(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    size_t i = make_box(data, ofst, track.isProtected() ? MP4_enca : MP4_mp4a);
    i += write_data(data, ofst + i, nullptr, 6); // reserved
    i += write_uint16(data, ofst + i, 1); // data_reference_index
    i += write_data(data, ofst + i, nullptr, 8); // reserved
    i += write_uint16(data, ofst + i, track.getAudioInfo().channelcount); // channelcount
    i += write_uint16(data, ofst + i, track.getAudioInfo().samplesize); // samplesize
    i += write_uint16(data, ofst + i, 0); // pre_defined
    i += write_uint16(data, ofst + i, 0); // reserved
    i += write_uint32(data, ofst + i, track.getAudioInfo().samplerate << 16); // sample rate as fixed-point number
    i += make_esds(data, ofst + i, static_cast<uint16_t>(track.getId()), track.getCodecData());

    if (track.isProtected()) {
        i += make_sinf(data, ofst + i, track);
    }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_stsd(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    size_t i = make_full_box(data, ofst, MP4_stsd, 0, 0);
    i += write_uint32(data, ofst + i, 1); // entry_count; TODO support more than 1

    switch (track.getOriginalFormat()) {
    case MP4_avc1:
        i += make_avc1(data, ofst + i, track);
        break;

    case MP4_mp4a:
        i += make_mp4a(data, ofst + i, track);
        break;

    case MP4_vp09:
        i += make_vp09(data, ofst + i, track);
        break;

    case MP4_wvtt:
        i += make_wvtt(data, ofst + i);
        break;

    case MP4_opus:
        i += make_Opus(data, ofst + i, track);
        break;
    }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_stts(uint8_t* data, size_t ofst)
{
    size_t i = make_full_box(data, ofst, MP4_stts, 0, 0);
    i += write_uint32(data, ofst + i, 0); // entry_count
    return update_size(data, ofst, i);
}

size_t MoovBox::make_stsc(uint8_t* data, size_t ofst)
{
    size_t i = make_full_box(data, ofst, MP4_stsc, 0, 0);
    i += write_uint32(data, ofst + i, 0); // entry_count
    return update_size(data, ofst, i);
}

size_t MoovBox::make_stsz(uint8_t* data, size_t ofst)
{
    size_t i = make_full_box(data, ofst, MP4_stsz, 0, 0);
    i += write_uint32(data, ofst + i, 0); // sample_size
    i += write_uint32(data, ofst + i, 0); // sample_count
    return update_size(data, ofst, i);
}

size_t MoovBox::make_stco(uint8_t* data, size_t ofst)
{
    size_t i = make_full_box(data, ofst, MP4_stco, 0, 0);
    i += write_uint32(data, ofst + i, 0); // entry_count
    return update_size(data, ofst, i);
}

size_t MoovBox::make_stbl(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    size_t i = make_box(data, ofst, MP4_stbl);
    i += make_stsd(data, ofst + i, track);
    i += make_stts(data, ofst + i);
    i += make_stsc(data, ofst + i);
    i += make_stsz(data, ofst + i);
    i += make_stco(data, ofst + i);

    // Key rotation not currently supported
    // if (1 < track.encryptionInfo().size()) {
    //     // we dont need sampe groups if there is only one key
    //     i += make_sgpd(data, ofst + i, track);
    // }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_minf(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    size_t i = make_box(data, ofst, MP4_minf);

    switch (track.getHandlerType()) {
    case MP4_vide:
        i += make_vmhd(data, ofst + i);
        break;

    case MP4_soun:
        i += make_smhd(data, ofst + i);
        break;

    case MP4_text:
        i += make_nmhd(data, ofst + i);
        break;
    }

    i += make_dinf(data, ofst + i);
    i += make_stbl(data, ofst + i, track);
    return update_size(data, ofst, i);
}

size_t MoovBox::make_mdia(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    size_t i = make_box(data, ofst, MP4_mdia);
    i += make_mdhd(data, ofst + i, track);
    i += make_hdlr(data, ofst + i, track.getHandlerType(), track.getHandlerName());
    i += make_minf(data, ofst + i, track);
    return update_size(data, ofst, i);
}

size_t MoovBox::make_trak(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    size_t i = make_box(data, ofst, MP4_trak);
    i += make_tkhd(data, ofst + i, track);
    i += make_mdia(data, ofst + i, track);

    if (0 < track.getElstMediaTime()) {
        i += make_edts(data, ofst + i, track.getElstMediaTime());
    }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_mfhd(uint8_t* data, size_t ofst, uint32_t sequence_number)
{
    size_t i = make_full_box(data, ofst, MP4_mfhd, 0, 0);
    i += write_uint32(data, ofst + i, sequence_number); // sequence_number
    return update_size(data, ofst, i);
}

size_t MoovBox::make_tfhd(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    // TODO optimize this for default values. (was not necessary when
    // these fragments do not traverse a network)
    // IN conjunction with trun, using defaults, we could save a little space
    uint32_t flags = mp4tfhd_flag::TFHD_DEFAULT_BASE_IS_MOOF;
    size_t i = make_full_box(data, ofst, MP4_tfhd, 0, flags);
    i += write_uint32(data, ofst + i, track.getId()); // track_ID

    // TODO Support being able to supply these fields.
    if (flags & mp4tfhd_flag::TFHD_BASE_DATA_OFFSET) {
        i += write_uint64(data, ofst + i, 0);
    }

    if (flags & mp4tfhd_flag::TFHD_STSD_ID) {
        i += write_uint32(data, ofst + i, 0);
    }

    if (flags & mp4tfhd_flag::TFHD_DEFAULT_DURATION) {
        i += write_uint32(data, ofst + i, 0);
    }

    if (flags & mp4tfhd_flag::TFHD_DEFAULT_SIZE) {
        i += write_uint32(data, ofst + i, 0);
    }

    if (flags & mp4tfhd_flag::TFHD_DEFAULT_FLAGS) {
        i += write_uint32(data, ofst + i, 0);
    }

    return update_size(data, ofst, i);
}

// Pick the optimal flags to produce the smallest trun
uint32_t MoovBox::trun_flags(const Mp4Track& track)
{
    uint32_t flags = mp4trun_flag::TRUN_DATA_OFFSET;
    for (auto sample = track.getSamples().begin(); sample != track.getSamples().end(); ++sample) {
        if (track.getTrackRun().default_sample_duration != sample->duration) {
            flags |= mp4trun_flag::TRUN_SAMPLE_DURATION;
        }

        if (track.getTrackRun().default_sample_size != sample->size) {
            flags |= mp4trun_flag::TRUN_SAMPLE_SIZE;
        }

        // skip the first sample
        if (sample != track.getSamples().begin() && track.getTrackRun().default_sample_flags != sample->flags) {
            flags |= mp4trun_flag::TRUN_SAMPLE_FLAGS;
        }

        if (sample->compositionTimeOffset) {
            flags |= mp4trun_flag::TRUN_SAMPLE_CTS;
        }
    }

    // We skipped the first sample, If the remain samples were default, Check to see if we need to override the first
    if (!(flags & mp4trun_flag::TRUN_SAMPLE_FLAGS) && (track.getTrackRun().default_sample_flags != track.getSamples().begin()->flags)) {
        flags |= mp4trun_flag::TRUN_FIRST_SAMPLE_FLAGS;
    }

    return flags;
}

// FLAG_TRUN_FIRST_SAMPLE_FLAGS_PRESENT or FLAG_TRUN_SAMPLE_FLAGS_PRESENT NOT BOTH!
size_t MoovBox::make_trun(uint8_t* data, size_t ofst, uint32_t data_offset, const Mp4Track& track)
{
    uint32_t flags = trun_flags(track);
    size_t i = make_full_box(data, ofst, MP4_trun, 0, flags);
    i += write_uint32(data, ofst + i, static_cast<uint32_t>(track.getSamples().size()));

    if (flags & mp4trun_flag::TRUN_DATA_OFFSET) {
        i += write_uint32(data, ofst + i, data_offset);
    }

    if (flags & mp4trun_flag::TRUN_FIRST_SAMPLE_FLAGS) {
        i += write_uint32(data, ofst + i, track.getSamples().begin()->flags);
    }

    // unsigned int(32) first_sample_flags; flag 0x000004 not set
    for (const auto& sample : track.getSamples()) {
        if (flags & mp4trun_flag::TRUN_SAMPLE_DURATION) {
            i += write_uint32(data, ofst + i, sample.duration);
        }

        if (flags & mp4trun_flag::TRUN_SAMPLE_SIZE) {
            i += write_uint32(data, ofst + i, static_cast<uint32_t>(sample.size));
        }

        if (flags & mp4trun_flag::TRUN_SAMPLE_FLAGS) {
            i += write_uint32(data, ofst + i, sample.flags);
        }

        if (flags & mp4trun_flag::TRUN_SAMPLE_CTS) {
            i += write_uint32(data, ofst + i, sample.compositionTimeOffset);
        }
    }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_tfdt(uint8_t* data, size_t ofst, int64_t base_media_decode_time, int64_t track_fragment_duration)
{
    int32_t flags = 0;
    auto MaxUint32 = static_cast<int64_t>(std::numeric_limits<uint32_t>::max());
    uint8_t version = (MaxUint32 < base_media_decode_time || MaxUint32 < track_fragment_duration) ? 1 : 0;
    uint32_t ntp_timestamp_integer = 0, ntp_timestamp_fraction = 0;
    size_t i = make_full_box(data, ofst, MP4_tfdt, version, flags);

    if (1 == version) {
        i += write_uint64(data, ofst + i, static_cast<uint64_t>(base_media_decode_time));
        i += write_uint64(data, ofst + i, static_cast<uint64_t>(track_fragment_duration));
    } else {
        i += write_uint32(data, ofst + i, static_cast<uint32_t>(base_media_decode_time));
        i += write_uint32(data, ofst + i, static_cast<uint32_t>(track_fragment_duration));
    }

    if (flags & 0x000001) {
        i += write_uint32(data, ofst + i, ntp_timestamp_integer);
        i += write_uint32(data, ofst + i, ntp_timestamp_fraction);
    }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_traf(uint8_t* data, size_t ofst, uint32_t data_offset, const Mp4Track& track)
{
    size_t i = make_box(data, ofst, MP4_traf);
    i += make_tfhd(data, ofst + i, track);
    i += make_tfdt(data, ofst + i, track.getBaseMediaDecodeTime(), track.getDuration());
    i += make_trun(data, ofst + i, data_offset, track);

    // Key rotation not currently supported
    // if (1 < track.encryptionInfo().size()) {
    //     // we dont need sampe groups if there is only one key
    //     i += make_sbgp(data, ofst + i, track);
    // }

    // TODO test this again later
    // bool includeSenc = track.useSubSampleEncryption() || track.curEncryptionIndex().useConstantIv();
    if (track.isProtected()) {
        i += make_saiz(data, ofst + i, track);
        i += make_saio(data, ofst + i);
        i += make_senc(data, ofst + i, track);
    }

    return update_size(data, ofst, i);
}

//  DRM boxes
size_t MoovBox::SampleAuxiliaryDataSize(size_t perSampleIvSize, bool useSubSampleEncryption, const mp4sample& sample)
{
    if (useSubSampleEncryption && !sample.subsampleRange.empty()) {
        perSampleIvSize += sizeof(uint16_t); // subsample count
        perSampleIvSize += sample.subsampleRange.size() * (sizeof(uint16_t) + sizeof(uint32_t));
    }

    return perSampleIvSize;
}

// uint32_t MoovBox::CencSampleEncryptionInformationGroupEntry_seig(uint8_t* data, size_t ofst, const fmp4drmFactory::encryptionInfo& encInfo)
// {
//     size_t i = 0;
//     uint8_t isProtected = 1;
//     uint8_t perSampleIvSize = encInfo.perSampleIvSize();
//     uint8_t patternEncryption = FOURCC("cbcs") == track.format() ? encInfo.patternEncryption() : 0;
//     i += write_uint8(data, ofst + i, 0); // reserved = 0
//     i += write_uint8(data, ofst + i, encInfo.patternEncryption()); // crypy_byte_block | skip_byte_block = 0
//     i += write_uint8(data, ofst + i, isProtected);
//     i += write_uint8(data, ofst + i, perSampleIvSize);
//     i += write_data(data, ofst + i, encInfo.kid.data(), encInfo.kid.size());

//     if (isProtected == 1 && perSampleIvSize == 0) {
//         // Using a constant IV seems less secure at first, but if we are carefull it isn't.
//         // By skipping the slice header in the subsample the first protected byte has a unifrom
//         // probiblity of being any value, Therefore not reducing the brute force search space.
//         auto& constant_IV = encInfo.iv;
//         i += write_uint8(data, ofst + i, static_cast<uint8_t>(constant_IV.size())); // constant_IV_size;
//         i += write_data(data, ofst + i, constant_IV.data(), constant_IV.size()); // constant_IV;
//     }

//     return static_cast<uint32_t>(i);
// }

// Sample Group Description Box
// size_t MoovBox::make_sgpd(uint8_t* data, size_t ofst, const Mp4Track& track)
// {
//     int32_t flags = 0;
//     uint8_t version = 1;
//     size_t i = make_full_box(data, ofst, MP4_sgpd, version, flags);
//     i += write_uint32(data, ofst + i, MP4_seig); // grouping_type

//     // TODO this asumes all sample groups are the same size. This is currently true.
//     uint32_t default_length = CencSampleEncryptionInformationGroupEntry_seig(nullptr, 0, track.curEncryptionInfo());

//     if (1 == version) {
//         i += write_uint32(data, ofst + i, default_length); // default_length
//     } else if (2 <= version) {
//         i += write_uint32(data, ofst + i, 0); // TODO default_sample_description_index;
//     }

//     i += write_uint32(data, ofst + i, static_cast<uint32_t>(track.encryptionInfo().size())); // entry_count;
//     for (const auto& entry : track.encryptionInfo()) {
//         if (1 == version && 0 == default_length) {
//             assert(false); // Future use
//             // default_length is not 0 in out current implimentation
//             i += write_uint32(data, ofst + i, 0); // description_length;
//         }
//         i += CencSampleEncryptionInformationGroupEntry_seig(data, ofst + i, entry);
//     }

//     return update_size(data, ofst, i);
// }

// Sample to Group Box
// size_t MoovBox::make_sbgp(uint8_t* data, size_t ofst, const Mp4Track& track)
// {
//     size_t i = make_full_box(data, ofst, MP4_sbgp, 0, 0);
//     i += write_uint32(data, ofst + i, MP4_seig); // grouping_type.

//     std::vector<std::pair<uint32_t, uint32_t>> entries;

//     uint32_t sample_count = 0;
//     size_t group_description_index = track.samples().front().samplegroup;
//     for (const auto& sample : track.samples()) {
//         // the first one will always be equal
//         if (group_description_index != sample.samplegroup) {
//             entries.emplace_back(sample_count, group_description_index);
//             group_description_index = sample.samplegroup;
//             sample_count = 0;
//         }

//         ++sample_count;
//     }

//     // sample_count is guaranteed to be at least 1 here
//     entries.emplace_back(sample_count, group_description_index);

//     i += write_uint32(data, ofst + i, static_cast<uint32_t>(entries.size())); // entry_count
//     for (const auto& entry : entries) {
//         i += write_uint32(data, ofst + i, entry.first); // sample_count
//         i += write_uint32(data, ofst + i, entry.second); // group_description_index
//     }

//     return update_size(data, ofst, i);
// }

size_t MoovBox::make_saiz(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    int32_t flags = 0;
    uint8_t version = 0;
    size_t i = make_full_box(data, ofst, MP4_saiz, version, flags);

    if (flags & 1) {
        i += write_uint32(data, ofst + i, track.getCurrentEncryptionInfo().scheme); // aux_info_type
        i += write_uint32(data, ofst + i, 0); //aux_info_type_parameter
    }

    // If all the sample infos have the same size, we can take fewer bytes
    auto& samples = track.getSamples();
    bool useSubSampleEncryption = track.useSubSampleEncryption();
    uint8_t perSampleIvSize = track.getCurrentEncryptionInfo().perSampleIvSize();
    size_t defaultSampleInfoSize = SampleAuxiliaryDataSize(perSampleIvSize, useSubSampleEncryption, samples.at(0));
    size_t maxSampleAuxiliaryDataSize = 0;

    for (const auto& sample : samples) {
        // check that all samples SampleAuxiliaryDataSize match defaultSampleInfoSize
        size_t sampleAuxiliaryDataSize = SampleAuxiliaryDataSize(perSampleIvSize, useSubSampleEncryption, sample);
        maxSampleAuxiliaryDataSize = std::max(maxSampleAuxiliaryDataSize, sampleAuxiliaryDataSize);
        if (sampleAuxiliaryDataSize != defaultSampleInfoSize) {
            defaultSampleInfoSize = 0;
        }
    }

    // 0 == maxSampleAuxiliaryDataSize is a strange case. Because every SampleAuxiliaryDataSize is zero
    // hence there is no good way to encode the saiz box.
    // Apple seems to want sample count to be zero while firefox wants the remainder of the table to be filled out.
    // Chrome doesnt care. Firefox is closest to the spec. In this case, saiz should not be included, but Apple didn't like that either.
    size_t sampleCount = (0 == maxSampleAuxiliaryDataSize && fourcc("cbcs") == track.getCurrentEncryptionInfo().scheme) ? 0 : samples.size();
    i += write_uint8(data, ofst + i, static_cast<uint8_t>(defaultSampleInfoSize)); // default_sample_info_size
    i += write_uint32(data, ofst + i, static_cast<uint32_t>(sampleCount)); // sample_count
    if (0 != sampleCount) {
        for (const auto& sample : samples) {
            auto sample_info_size = SampleAuxiliaryDataSize(perSampleIvSize, useSubSampleEncryption, sample);
            i += write_uint8(data, ofst + i, static_cast<uint8_t>(sample_info_size));
        }
    }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_saio(uint8_t* data, size_t ofst)
{
    int32_t flags = 0;
    uint8_t version = 0;
    size_t i = make_full_box(data, ofst, MP4_saio, version, flags);

    // using senc instead of mdat, so data is contiguous, hence one entry
    i += write_uint32(data, ofst + i, 1);

    // This Assumes senc comes immediately after this! Otherwise corrupt file
    auto offset = ofst + i + sizeof(uint32_t) + FULLBOX_HEADER_SIZE + sizeof(uint32_t);
    i += write_uint32(data, ofst + i, static_cast<uint32_t>(offset));
    return update_size(data, ofst, i);
}

size_t MoovBox::make_senc(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    uint8_t version = 0;
    int32_t flags = track.useSubSampleEncryption() ? 2 : 0;
    uint32_t perSampleIVSize = track.getCurrentEncryptionInfo().perSampleIvSize();
    size_t i = make_full_box(data, ofst, MP4_senc, version, flags);
    i += write_uint32(data, ofst + i, static_cast<uint32_t>(track.getSamples().size())); // sample_count
    for (const auto& sample : track.getSamples()) {
        if (0 < perSampleIVSize) {
            i += write_data(data, ofst + i, sample.initializationVector.data(), perSampleIVSize);
        }
        if (flags & 2) {
            i += write_uint16(data, ofst + i, static_cast<uint16_t>(sample.subsampleRange.size())); // subsample_count
            for (const auto& subsample : sample.subsampleRange) {
                i += write_uint16(data, ofst + i, subsample.first); // BytesOfClearData
                i += write_uint32(data, ofst + i, subsample.second); // BytesOfProtectedData
            }
        }
    }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_tenc(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    int32_t flags = 0;
    const auto& encInfo = track.getCurrentEncryptionInfo();
    uint8_t version = fourcc("cbcs") == encInfo.scheme ? 1 : 0;
    size_t i = make_full_box(data, ofst, fourcc("tenc"), version, flags);

    uint8_t default_isProtected = 1;
    uint8_t default_Per_Sample_IV_Size = encInfo.perSampleIvSize();
    // per spec cbcs+mp4a should be 0x0a, but sample content is 0x00
    uint8_t patternEncryption = MP4_avc1 == track.getOriginalFormat() ? encInfo.patternEncryption() : 0;

    i += write_uint8(data, ofst + i, 0);
    i += write_uint8(data, ofst + i, patternEncryption);
    i += write_uint8(data, ofst + i, default_isProtected);
    i += write_uint8(data, ofst + i, default_Per_Sample_IV_Size);

    auto& default_KID = encInfo.kid;
    i += write_data(data, ofst + i, default_KID.data(), default_KID.size());
    if (1 == default_isProtected && 0 == default_Per_Sample_IV_Size) {
        // constantIV is used in CBC mode, It is the same size as the block (16)
        i += write_uint8(data, ofst + i, static_cast<uint8_t>(encInfo.iv.size()));
        i += write_data(data, ofst + i, encInfo.iv.data(), encInfo.iv.size());
    }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_frma(uint8_t* data, size_t ofst, uint32_t data_format)
{
    size_t i = make_box(data, ofst, MP4_frma);
    i += write_uint32(data, ofst + i, data_format);
    return update_size(data, ofst, i);
}

size_t MoovBox::make_schm(uint8_t* data, size_t ofst, uint32_t scheme_type)
{
    int32_t flags = 0;
    uint8_t version = 0;
    size_t i = make_full_box(data, ofst, MP4_schm, version, flags);
    uint32_t scheme_version = 0x00010000;
    std::string scheme_uri;

    // FOURCC("cenc")  // AES-CTR
    // FOURCC("cbcs")  // AES-CBC
    i += write_uint32(data, ofst + i, scheme_type);
    i += write_uint32(data, ofst + i, scheme_version);

    if (flags & 1) {
        i += write_string(data, ofst + i, scheme_uri);
    }

    return update_size(data, ofst, i);
}

size_t MoovBox::make_schi(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    size_t i = make_box(data, ofst, MP4_schi);
    i += make_tenc(data, ofst + i, track);
    return update_size(data, ofst, i);
}

size_t MoovBox::make_sinf(uint8_t* data, size_t ofst, const Mp4Track& track)
{
    size_t i = make_box(data, ofst, MP4_sinf);
    i += make_frma(data, ofst + i, track.getOriginalFormat());
    i += make_schm(data, ofst + i, track.getCurrentEncryptionInfo().scheme);
    i += make_schi(data, ofst + i, track);
    return update_size(data, ofst, i);
}

std::vector<uint8_t> MoovBox::make_vttc(const std::string& payload)
{
    uint32_t size = static_cast<uint32_t>(make_vttc(nullptr, payload));

    std::vector<uint8_t> vttc;
    vttc.resize(size);
    make_vttc(vttc.data(), payload);
    return vttc;
}

std::vector<uint8_t> MoovBox::make_vtte()
{
    std::vector<uint8_t> vtte;
    vtte.resize(sizeof(uint32_t) + sizeof(uint32_t));
    make_vtte(vtte.data());
    return vtte;
}

size_t MoovBox::make_vttc(uint8_t* data, const std::string& payload)
{
    // VTTCueBox
    size_t i = make_box(data, 0, MP4_vttc);
    i += make_payl(data, i, payload);

    return update_size(data, 0, i);
}

size_t MoovBox::make_payl(uint8_t* data, size_t ofst, const std::string& payload)
{
    // CuePayloadBox
    size_t i = make_box(data, ofst, MP4_payl);
    i += write_string(data, ofst + i, payload);

    return update_size(data, ofst, i);
}

size_t MoovBox::make_vtte(uint8_t* data)
{
    // VTTEmptyCueBox
    size_t i = make_box(data, 0, MP4_vtte);
    return update_size(data, 0, i);
}
}
}

////////////////////////////////////////////////////////////////////////////////
// Reference
// type ftyp (ftyp)
// type moov (moov)
// type mvhd (moov.mvhd)
// type mvex (moov.mvex)
// type mehd (moov.mvex.mehd)
// type trex (moov.mvex.trex)
// type trak (moov.trak)
// type tkhd (moov.trak.tkhd)
// type edts (moov.trak.edts)
// type elst (moov.trak.edts.elst)
// type mdia (moov.trak.mdia)
// type mdhd (moov.trak.mdia.mdhd)
// type minf (moov.trak.mdia.minf)
// type vmhd (moov.trak.mdia.minf.vmhd)
// type nmhd (moov.trak.mdia.minf.nmhd)
// type dinf (moov.trak.mdia.minf.dinf)
// type url  (moov.trak.mdia.minf.dinf.dref.url)
// type stbl (moov.trak.mdia.minf.stbl)
// type stsd (moov.trak.mdia.minf.stbl.stsd)
// type avc1 (moov.trak.mdia.minf.stbl.stsd.avc1)
// type avcC (moov.trak.mdia.minf.stbl.stsd.avc1.avcC)
// type vp09 (moov.trak.mdia.minf.stbl.stsd.vp09)
// type vpcC (moov.trak.mdia.minf.stbl.stsd.vp09.vpcC)
// type wvtt (moov.trak.mdia.minf.stbl.stsd.wvtt)
// type vttC (moov.trak.mdia.minf.stbl.stsd.wvtt.vttC)
// type vlab (moov.trak.mdia.minf.stbl.stsd.wvtt.vlab)
// type Opus (moov.trak.mdia.minf.stbl.stsd.Opus)
// type dOps (moov.trak.mdia.minf.stbl.stsd.Opus.dOps)
// type stts (moov.trak.mdia.minf.stbl.stts)
// type stsc (moov.trak.mdia.minf.stbl.stsc)
// type stsz (moov.trak.mdia.minf.stbl.stsz)
// type stco (moov.trak.mdia.minf.stbl.stco)
////////////////////////////////////////////////////////////////////////////////
/*
styp
moof
    mfhd
    traf
        tfhd
        tfdt
        trun
mdat
*/
