#include "Mp4Parser.hpp"
#include "playercore/SecureSampleBuffer.hpp"
#include <algorithm>
#include <cassert>

namespace twitch {
namespace media {
const int BoxHeaderSize = 8;

Mp4Parser::Mp4Parser(std::shared_ptr<Log> log)
    : m_log(std::move(log))
    , m_ftyp {}
    , m_mvhd {}
    , m_mdat {}
    , m_moofOffset(0)
    , m_nextFragmentOffset(0)
    , m_isInitFragment(false)
    , m_isFragmented(false)
    , m_sequenceNumber(0)
    , m_stream(nullptr)
{
}

std::shared_ptr<Mp4Track> Mp4Parser::getTrackById(uint32_t id)
{
    auto it = std::find_if(m_tracks.begin(), m_tracks.end(),
        [id](std::shared_ptr<Mp4Track> track) { return track->m_head.track_ID == id; });
    return it != std::end(m_tracks) ? *it : nullptr;
}

uint32_t Mp4Parser::getSampleDuration(const Mp4Track& track, const mp4sample& sample)
{
    uint32_t duration = 0;

    if (track.m_run.default_sample_duration > 0) {
        duration = track.m_run.default_sample_duration;
    } else if (track.m_fragment.default_sample_duration > 0) {
        duration = track.m_fragment.default_sample_duration;
    }

    if (sample.duration > 0) {
        duration = sample.duration;
    }

    return duration;
}

std::vector<uint8_t> Mp4Parser::getSampleKeyId(const Mp4Track& track, size_t sampleNo)
{
    if (sampleNo < track.m_sampleToGroup.size()) {
        auto groupIndex = track.m_sampleToGroup[sampleNo];

        if (0 != groupIndex && groupIndex <= track.m_sampleGroup.size()) {
            const auto& group = track.m_sampleGroup[groupIndex - 1];
            return group.isProtected ? group.kid : std::vector<uint8_t>();
        }
    }

    return track.m_defaultKeyId;
}

std::vector<uint8_t> Mp4Parser::getSampleIV(const Mp4Track& track, size_t sampleNo)
{
    // TODO check group, then tenc, then sample. < only needed for cbcs (hence not needed)
    return track.m_samples[sampleNo].initializationVector;
}

MediaTime Mp4Parser::getDuration() const
{
    MediaTime audioDuration;
    MediaTime videoDuration;
    for (auto& track : m_tracks) {
        if (track->getHandlerType() == MP4_soun) {
            audioDuration = track->getDurationTime();
        }
        if (track->getHandlerType() == MP4_vide) {
            videoDuration = track->getDurationTime();
        }
    }

    if (m_isFragmented) {
        // if muxed use a consistent track for frame by frame mp4s to prevent additive errors
        bool isMuxed = audioDuration > MediaTime::zero() && videoDuration > MediaTime::zero();
        return isMuxed ? audioDuration : std::max(audioDuration, videoDuration);
    } else {
        return std::max(audioDuration, videoDuration);
    }
}

void Mp4Parser::setStream(Stream* stream)
{
    m_stream = stream;

    if (m_stream) {
        m_stream->seek(0);
    }
}

bool Mp4Parser::canReadTracks()
{
    if (!m_stream || !m_stream->length()) {
        return false;
    }

    m_stream->seek(0);
    bool hasHeader = false;
    int moofCount = 0;
    readBoxes(0, static_cast<size_t>(m_stream->length()), [this, &hasHeader, &moofCount](mp4box& box) {
        if (box.type == MP4_moov || box.type == MP4_moof) {
            hasHeader = m_stream->length() >= static_cast<int64_t>(box.data + box.size - BoxHeaderSize);
            moofCount++;
            if (hasHeader) {
                return false; // don't need to read more
            }
        }
        return true;
    });
    return hasHeader;
}

MediaResult Mp4Parser::readTracks()
{
    if (!m_stream || !m_stream->length()) {
        return MediaResult::ErrorInvalidState;
    }

    // read top level boxes
    m_isInitFragment = false;
    m_sequenceNumber = 0;
    m_stream->seek(0);

    // for multi fragment MP4s we only want to read the first moof/mdat pair, it's possible there
    // are more in the stream which we skip on the first pass
    m_nextFragmentOffset = 0;
    m_emsgs.clear();
    int moofCount = 0;
    seekTo(MediaTime::zero());

    return readBoxes(0, static_cast<size_t>(m_stream->length()), [this, &moofCount](mp4box& box) {
        switch (box.type) {
        case MP4_ftyp:
            read_ftyp();
            break;

        case MP4_emsg:
            read_emsg(box);
            break;

        case MP4_moov:
            // reset track and protection data
            m_tracks.clear();
            m_psshBoxes.clear();
            m_psshBytes.clear();
            // read moov header
            read_moov(box);
            m_isInitFragment = true;
            break;

        case MP4_moof:
            if (moofCount > 0) {
                m_nextFragmentOffset = static_cast<size_t>(m_stream->position() - BoxHeaderSize);
                return false;
            }
            read_moof(box);
            moofCount++;
            break;

        case MP4_mdat:
            m_mdat = box;
            break;

        case MP4_free:
        case MP4_skip:
            break;

        default:
            unhandledBox(box, box);
            break;
        }
        return true;
    });
}

size_t Mp4Parser::getTrackOffset(const Mp4Track& track) const
{
    size_t offset = m_mdat.data;
    if (m_isFragmented && m_moofOffset) {
        offset = m_moofOffset - BoxHeaderSize;
        offset += track.m_run.data_offset;
    }
    return offset;
}

bool Mp4Parser::canReadSamples(const TrackList& tracks, MediaTime duration)
{
    if (!m_stream || !m_stream->length()) {
        return false;
    }

    if (m_isFragmented) {
        return true;
    }

    for (const auto& track : tracks) {
        uint64_t sampled = 0;
        uint32_t prevIndex = 0;
        uint32_t chunkOffset = 0;
        size_t offset = getTrackOffset(*track);

        if (static_cast<int64_t>(offset) >= m_stream->length()) {
            return false;
        }

        for (uint32_t i = track->m_sampledIndex; i < track->m_samples.size(); i++) {
            auto const& mp4sample = track->m_samples.at(i);

            if (mp4sample.chunkIndex >= track->m_chunks.size()) {
                m_log->error("Invalid chunk index %d", mp4sample.chunkIndex);
                continue;
            }

            auto& chunk = track->m_chunks.at(mp4sample.chunkIndex);

            if (mp4sample.chunkIndex != prevIndex) {
                chunkOffset = 0;
            }

            sampled += getSampleDuration(*track, mp4sample);
            chunkOffset += mp4sample.size;
            prevIndex = mp4sample.chunkIndex;

            // check file truncated (may still be being written out)
            if (static_cast<int64_t>(chunk.offset + chunkOffset) > m_stream->length()) {
                return false;
            }

            if (track->scaleTime(sampled) >= duration && mp4sample.chunkNumber == 0) {
                break;
            }
        }
    }

    return true;
}

MediaResult Mp4Parser::readSamples(const TrackList& tracks, const SampleCallback& callback, MediaTime maxDuration)
{
    if (!m_stream || !m_stream->length()) {
        m_log->error("MP4 Invalid stream");
        return MediaResult::ErrorInvalidState;
    }

    // read the data section in order starting from the track with the smallest offset
    auto comparator = [](std::shared_ptr<Mp4Track> t1, std::shared_ptr<Mp4Track> t2) {
        return t1->m_run.data_offset > t2->m_run.data_offset;
    };
    TrackList sorted = tracks;
    std::sort(sorted.begin(), sorted.end(), comparator);

    for (const std::shared_ptr<Mp4Track>& track : sorted) {
        size_t offset = getTrackOffset(*track);
        if (m_isFragmented && m_moofOffset) {
            if (offset < m_mdat.data) {
                m_log->error("MOOF start before data offset: %d data: %d", offset, m_mdat.data);
                return MediaResult::ErrorInvalidState;
            }
        }

        if (!m_stream->seek(offset)) {
            m_log->error("Stream seek failed offset %d length %d", offset, m_stream->length());
            return MediaResult(MediaResult::Error, m_stream->error());
        }

        uint64_t scaledDuration = track->scaleTime(maxDuration);
        uint64_t sampled(0);
        uint32_t prevIndex = 0; // only used if the mp4 is chunked
        uint32_t chunkOffset = 0;

        while (track->m_sampledIndex < track->m_samples.size()) {

            auto const& mp4sample = track->m_samples.at(track->m_sampledIndex);

            // all samples in a chunk must be read for this sample duration
            if (sampled >= scaledDuration && mp4sample.chunkNumber == 0) {
                break;
            }

            if (!m_isFragmented) {

                if (mp4sample.chunkIndex >= track->m_chunks.size()) {
                    continue;
                }

                auto& chunk = track->m_chunks.at(mp4sample.chunkIndex);

                if (mp4sample.chunkIndex != prevIndex) {
                    chunkOffset = 0;
                }

                auto position = static_cast<int64_t>(chunk.offset + chunkOffset);
                if (position > m_stream->length()) {
                    break;
                }

                if (!m_stream->seek(static_cast<size_t>(position))) {
                    return MediaResult(MediaResult::Error, m_stream->error());
                }
            }

            if (m_stream->position() + mp4sample.size > m_stream->length()) {
                break;
            }

            std::shared_ptr<MediaSampleBuffer> sample;

            if (track->isProtected()) {
                auto encryptedSample = std::make_shared<SecureSampleBuffer>();
                encryptedSample->keyId = getSampleKeyId(*track, track->m_sampledIndex);
                encryptedSample->initializationVector = getSampleIV(*track, track->m_sampledIndex);
                encryptedSample->subsampleRange = mp4sample.subsampleRange;
                if (track->m_protectionSchemeType == fourcc("cenc")) {
                    encryptedSample->mode = SecureSampleBuffer::CipherMode::CTR;
                } else if (track->m_protectionSchemeType == fourcc("cbcs")) {
                    encryptedSample->mode = SecureSampleBuffer::CipherMode::CBC;
                }
                // if no subsample data mark the whole sample as encrypted
                if (encryptedSample->subsampleRange.empty()) {
                    encryptedSample->subsampleRange.emplace_back(static_cast<uint16_t>(0), mp4sample.size);
                }
                sample = encryptedSample;
            } else {
                sample = std::make_shared<MediaSampleBuffer>();
            }

            sample->buffer.resize(mp4sample.size);

            auto read = m_stream->read(sample->buffer.data(), mp4sample.size);
            if (read < mp4sample.size) {
                m_log->error("Stream read failed read %d < %d", static_cast<int>(read), mp4sample.size);
                return MediaResult(MediaResult::Error, m_stream->error());
            }

            auto duration = getSampleDuration(*track, mp4sample);
            auto timescale = track->getTimescale();
            sample->duration = MediaTime(duration, timescale);
            sample->decodeTime = MediaTime(track->getBaseMediaDecodeTime() + track->m_sampledDuration, timescale);
            MediaTime compositionTimeOffset(mp4sample.compositionTimeOffset, timescale);
            sample->presentationTime = sample->decodeTime + compositionTimeOffset;

            // sample flags, only sample_is_non_sync_sample is checked
            //bit(4) reserved=0;
            //unsigned int(2) is_leading;
            //unsigned int(2) sample_depends_on;
            //unsigned int(2) sample_is_depended_on;
            //unsigned int(2) sample_has_redundancy;
            //bit(3) sample_padding_value;
            //bit(1) sample_is_non_sync_sample;
            //unsigned int(16) sample_degradation_priority;
            sample->isSyncSample = (mp4sample.flags & 0x00010000) == 0;

            sampled += duration;
            track->m_sampledIndex++;
            track->m_sampledDuration += duration;
            chunkOffset += mp4sample.size;
            prevIndex = mp4sample.chunkIndex;

            callback(*track, sample);
        }
    }

    return MediaResult::Ok;
}

MediaResult Mp4Parser::seekTo(MediaTime time)
{
    for (auto& track : m_tracks) {
        track->m_sampledIndex = 0;
        track->m_sampledDuration = 0;
    }

    if (time == MediaTime::zero()) {
        return MediaResult::Ok;
    }

    // find sync point in the video track first
    for (auto& track : m_tracks) {
        if (track->getHandlerType() != MP4_vide) {
            continue;
        }

        uint32_t syncIndex = 0;
        uint64_t syncDuration = 0;
        uint64_t scaledTime = track->scaleTime(time);

        for (uint32_t i = 0; i < track->m_samples.size(); i++) {
            track->m_sampledDuration += getSampleDuration(*track, track->m_samples[i]);

            // find the closest sync sample in the video track and sync to that time
            if (!track->m_syncSamples.empty()) {
                auto& syncSamples = track->m_syncSamples;

                if (std::find(syncSamples.begin(), syncSamples.end(), i) != syncSamples.end()) {
                    if (track->m_sampledDuration > scaledTime) {
                        // if this sync point is greater than the seek time, break and use the previous
                        // sync point
                        break;
                    } else {
                        syncIndex = i;
                        syncDuration = track->m_sampledDuration;
                    }
                }
            } else if (track->m_sampledDuration >= scaledTime) {
                syncIndex = i;
                syncDuration = track->m_sampledDuration;
                break;
            }
        }

        track->m_sampledIndex = syncIndex;
        track->m_sampledDuration = syncDuration;
        time = track->scaleTime(syncDuration);
    }

    for (auto& track : m_tracks) {
        if (track->getHandlerType() == MP4_vide) {
            continue;
        }

        uint64_t scaledTime = track->scaleTime(time);
        for (uint32_t i = 0; i < track->m_samples.size(); i++) {
            track->m_sampledDuration += getSampleDuration(*track, track->m_samples[i]);

            if (track->m_sampledDuration >= scaledTime) {
                track->m_sampledIndex = i;
                break;
            }
        }
    }

    return MediaResult::Ok;
}

bool Mp4Parser::isEnded() const
{
    for (const std::shared_ptr<Mp4Track>& track : m_tracks) {
        if (track->m_sampledIndex < track->m_samples.size()) {
            return false;
        }
    }
    return !m_tracks.empty();
}

uint8_t Mp4Parser::readUint8()
{
    uint8_t data;
    m_stream->read(&data, 1);
    return data;
}

uint16_t Mp4Parser::readUint16()
{
    uint8_t data[2];
    m_stream->read(data, 2);
    return (uint16_t)((data[0] << 8) | data[1]);
}

uint32_t Mp4Parser::readUint32()
{
    uint8_t data[4];
    m_stream->read(data, 4);
    return (uint32_t)((data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]);
}

uint64_t Mp4Parser::readUint64()
{
    uint64_t value = (((uint64_t)readUint32()) << 32);
    return value | readUint32();
}

std::vector<uint8_t> Mp4Parser::readBuffer(size_t size)
{
    std::vector<uint8_t> buffer(size);
    m_stream->read(buffer.data(), buffer.size());
    return buffer;
}

std::string Mp4Parser::readNullTerminatedString(size_t maxSize)
{
    std::string result;
    uint8_t value;
    m_stream->read(&value, 1);
    while (value != '\0' && result.size() < maxSize) {
        result.push_back(static_cast<char>(value));
        m_stream->read(&value, 1);
    }
    return result;
}

void Mp4Parser::readBox(mp4box& box)
{
    box.size = readUint32();
    box.type = readUint32();

    if (box.size == 1) {
        box.size = readUint64();
    }

    if (box.type == MP4_uuid) { // 16 byte uuid
        const int UUIDSize = 16;
        m_stream->read(box.usertype, UUIDSize);
        m_stream->seek(static_cast<size_t>(m_stream->position() - UUIDSize));
    }

    box.data = static_cast<size_t>(m_stream->position());
}

MediaResult Mp4Parser::readBoxes(const mp4box& parent, std::function<bool(mp4box&)>&& callback)
{
    size_t limit = parent.data + static_cast<size_t>(parent.size);
    return readBoxes(parent.data, limit, std::move(callback));
}

MediaResult Mp4Parser::readBoxes(size_t start, size_t limit, std::function<bool(mp4box&)>&& callback)
{
    // reads all the boxes in a container box calling a callback function on each box
    if (!m_stream->seek(start)) {
        return MediaResult(MediaResult::Error, m_stream->error());
    }

    size_t position = start;

    if (m_stream->length() < static_cast<int64_t>(limit - BoxHeaderSize)) { // file may be incomplete or truncated
        return MediaResult::Ok;
    }

    while (position < limit - BoxHeaderSize) {
        mp4box box;
        readBox(box);
        if (!callback(box)) {
            break;
        }
        auto offset = static_cast<int64_t>(box.data + box.size - BoxHeaderSize);

        if (offset > m_stream->length()) {
            break;
        }

        if (!m_stream->seek(static_cast<size_t>(offset))) {
            return MediaResult(MediaResult::Error, m_stream->error());
        }

        position += static_cast<size_t>(box.size);
    }

    return MediaResult::Ok;
}

void Mp4Parser::read_ftyp()
{
    m_ftyp.major_brand = readUint32();
    m_ftyp.minor_version = readUint32();
    m_ftyp.compatible_brand = readUint32();
}

void Mp4Parser::read_moov(const mp4box& moov)
{
    readBoxes(moov, [this, moov](mp4box& box) {
        switch (box.type) {
        case MP4_mvhd:
            read_mvhd();
            break;

        case MP4_trak:
            read_trak(box);
            break;

        case MP4_mvex:
            read_mvex(box);
            break;

        case MP4_pssh: {
            auto position = m_stream->position();
            auto boxSize = static_cast<size_t>(box.size);
            // copy the whole pssh box which is needed by drm apis
            m_psshBytes.emplace_back();
            auto& psshBytes = m_psshBytes.back();
            psshBytes.clear();
            psshBytes.resize(boxSize);
            m_stream->seek(static_cast<size_t>(position - BoxHeaderSize));
            m_stream->read(psshBytes.data(), boxSize);
            m_stream->seek(static_cast<size_t>(position));
            read_pssh();
        } break;

        default:
            unhandledBox(moov, box);
            break;
        }
        return true;
    });
}

void Mp4Parser::read_emsg(const mp4box& emsg)
{
    auto start = m_stream->position();
    readUint32(); // version / flags
    m_emsgs.emplace_back();
    auto& message = m_emsgs.back();
    auto remaining = emsg.size - (m_stream->position() - start);
    message.scheme_id_uri = readNullTerminatedString(remaining);
    remaining = emsg.size - (m_stream->position() - start);
    message.value = readNullTerminatedString(remaining);
    message.timescale = readUint32();
    message.presentation_time_delta = readUint32();
    message.event_duration = readUint32();
    message.id = readUint32();
    remaining = emsg.size - (m_stream->position() - start);
    message.data = readBuffer(remaining);
}

void Mp4Parser::read_mvex(const mp4box& mvex)
{
    m_isFragmented = true;
    readBoxes(mvex, [this, &mvex](mp4box& box) {
        switch (box.type) {
        case MP4_trex:
            read_trex();
            break;
        default:
            unhandledBox(mvex, box);
            break;
        }
        return true;
    });
}

void Mp4Parser::read_mvhd()
{
    uint32_t version = readUint32() >> 24;

    if (version == 0) {
        m_mvhd.creation_time = readUint32();
        m_mvhd.modification_time = readUint32();
        m_mvhd.timescale = readUint32();
        m_mvhd.duration = readUint32();
    } else if (version == 1) {
        m_mvhd.creation_time = readUint64();
        m_mvhd.modification_time = readUint64();
        m_mvhd.timescale = readUint32();
        m_mvhd.duration = readUint64();
    }
}

void Mp4Parser::read_pssh()
{
    m_psshBoxes.emplace_back();
    auto& pssh = m_psshBoxes.back();
    uint32_t version = readUint32() >> 24;
    m_stream->read(pssh.systemId, 16);

    if (version) {
        pssh.keyIdCount = readUint32();
        uint8_t keyId[16];
        for (uint32_t i = 0; i < pssh.keyIdCount; i++) {
            m_stream->read(keyId, 16); // keys (not used yet)
        }
    }
    readUint32(); // data size (not used yet)
}

void Mp4Parser::read_trak(const mp4box& trak)
{
    std::shared_ptr<Mp4Track> track = std::make_shared<Mp4Track>();
    m_tracks.push_back(track);
    readBoxes(trak, [this, track, trak](mp4box& box) {
        switch (box.type) {
        case MP4_tkhd:
            read_tkhd(*track);
            break;

        case MP4_mdia:
            read_mdia(*track, box);
            break;

        default:
            unhandledBox(trak, box);
            break;
        }
        return true;
    });
}

void Mp4Parser::read_trex()
{
    readUint32(); // version / flags
    uint32_t trackId = readUint32();
    uint32_t default_sample_description_index = readUint32();
    uint32_t default_sample_duration = readUint32();
    uint32_t default_sample_size = readUint32();
    uint32_t default_sample_flags = readUint32();
    std::shared_ptr<Mp4Track> track = getTrackById(trackId);

    auto& defaults = m_trackFragmentDefaults[trackId];
    defaults.track_ID = trackId;
    defaults.sample_description_index = default_sample_description_index;
    defaults.default_sample_size = default_sample_size;
    defaults.default_sample_duration = default_sample_duration;
    defaults.default_sample_flags = default_sample_flags;

    // this box may appear before the trak box in which case there is no track yet
    if (track) {
        track->m_fragment = m_trackFragmentDefaults[trackId];
    }
}

void Mp4Parser::read_tkhd(Mp4Track& track)
{
    mp4tkhd& head = track.m_head;
    uint32_t version = readUint32() >> 24;

    if (version == 0) {
        head.creation_time = readUint32();
        head.modification_time = readUint32();
        head.track_ID = readUint32();
        head.reserved1 = readUint32();
        head.duration = readUint32();
    } else if (version == 1) {
        head.creation_time = readUint64();
        head.modification_time = readUint64();
        head.track_ID = readUint32();
        head.reserved1 = readUint32();
        head.duration = readUint32();
    }

    head.reserved2[0] = readUint32();
    head.reserved2[1] = readUint32();
    head.layer = readUint16();
    head.alternate_group = readUint16();
    head.volume = readUint16();
    head.reserved3 = readUint16();

    for (int i = 0; i < 9; i++) {
        head.matrix[i] = readUint32();
    }

    head.width = readUint32() >> 16;
    head.height = readUint32() >> 16;
    // apply any fragment defaults if they were read before
    track.m_fragment = m_trackFragmentDefaults[head.track_ID];

    m_log->debug("track: %d size: %d x %d", head.track_ID, head.width, head.height);
}

void Mp4Parser::read_mdia(Mp4Track& track, const mp4box& mdia)
{
    readBoxes(mdia, [this, &mdia, &track](mp4box& box) {
        switch (box.type) {
        case MP4_minf:
            read_minf(track, box);
            break;

        case MP4_hdlr:
            read_hdlr(track, box);
            break;

        case MP4_mdhd:
            read_mdhd(track);
            break;

        default:
            unhandledBox(mdia, box);
            break;
        }
        return true;
    });
}

void Mp4Parser::read_hdlr(Mp4Track& track, const mp4box& hdlr)
{
    readUint32(); // skip version & flags
    readUint32(); // pre_defined
    track.m_handlerType = readUint32();

    for (int i = 0; i < 3; i++) {
        readUint32(); // reserved 3
    }

    size_t size = static_cast<size_t>(hdlr.size);
    std::vector<uint8_t> buffer(size);
    m_stream->read(buffer.data(), size);
    track.m_handlerName.assign(reinterpret_cast<const char*>(buffer.data()), size);
}

void Mp4Parser::read_minf(Mp4Track& track, const mp4box& minf)
{
    readBoxes(minf, [this, &minf, &track](mp4box& box) {
        switch (box.type) {
        case MP4_stbl:
            read_stbl(track, box);
            break;

        default:
            unhandledBox(minf, box);
            break;
        }
        return true;
    });
}

void Mp4Parser::read_mdhd(Mp4Track& track)
{
    mp4mdhd& header = track.m_header;
    uint32_t version = readUint32() >> 24;

    if (version == 0) {
        header.creation_time = readUint32();
        header.modification_time = readUint32();
        header.timescale = readUint32();
        header.duration = readUint32();
    } else if (version == 1) {
        header.creation_time = readUint64();
        header.modification_time = readUint64();
        header.timescale = readUint32();
        header.duration = readUint64();
    }
}

void Mp4Parser::read_stbl(Mp4Track& track, const mp4box& stbl)
{
    readBoxes(stbl, [this, &stbl, &track](mp4box& box) {
        switch (box.type) {
        case MP4_stco:
            read_stco(track);
            break;

        case MP4_co64:
            read_co64(track);
            break;

        case MP4_stsc:
            read_stsc(track);
            break;

        case MP4_stsd:
            read_stsd(track, box);
            break;

        case MP4_stss:
            read_stss(track);
            break;

        case MP4_ctts:
            read_ctts(track);
            break;

        case MP4_stts:
            read_stts(track);
            break;

        case MP4_stsz:
            read_stsz(track);
            break;

        case MP4_sgpd:
            read_sgpd(track);
            break;

        default:
            unhandledBox(stbl, box);
            break;
        }
        return true;
    });
}

void Mp4Parser::read_stco(Mp4Track& track)
{
    readUint32(); // skip version & flags
    uint32_t entryCount = readUint32();
    track.m_chunks.resize(entryCount);

    for (uint32_t i = 0; i < entryCount; i++) {
        uint32_t chunkOffset = readUint32();
        track.m_chunks[i].offset = chunkOffset;
    }
}

void Mp4Parser::read_co64(Mp4Track& track)
{
    readUint32(); // skip version & flags
    uint32_t entryCount = readUint32();
    track.m_chunks.resize(entryCount);

    for (uint32_t i = 0; i < entryCount; i++) {
        uint64_t chunkOffset = readUint64();
        track.m_chunks[i].offset = chunkOffset;
    }
}

void Mp4Parser::read_stsc(Mp4Track& track)
{
    readUint32(); // skip version & flags
    uint32_t entryCount = readUint32();

    uint32_t sampleIndex = 0;
    uint32_t prevIndex = 0;

    for (uint32_t i = 0; i < entryCount; i++) {
        uint32_t firstChunk = readUint32();
        uint32_t samplesPerChunk = readUint32();
        uint32_t sampleDescriptionIndex = readUint32();

        uint32_t chunkIndex = firstChunk - 1;

        if (chunkIndex + 1 > track.m_chunks.size()) {
            track.m_chunks.resize(chunkIndex + 1);
        }

        // copy run chunk info if the previous chunk applies to multiple chunks
        if (chunkIndex - prevIndex > 1) {
            for (uint32_t k = 1; k < chunkIndex - prevIndex; k++) {
                track.m_chunks[prevIndex + k] = track.m_chunks[prevIndex];

                for (uint32_t j = 0; j < track.m_chunks[prevIndex + k].sample_count; j++) {

                    if (sampleIndex + 1 > track.m_samples.size()) {
                        track.m_samples.resize(sampleIndex + 1);
                    }

                    auto& sample = track.m_samples.at(sampleIndex);
                    sample.chunkIndex = prevIndex + k;
                    sample.chunkNumber = j;
                    sampleIndex++;
                }
            }
        }

        mp4chunk& chunk = track.m_chunks.at(chunkIndex);
        chunk.sample_count = samplesPerChunk;
        chunk.sample_description_index = sampleDescriptionIndex;

        if (entryCount - i == 1) {
            if (samplesPerChunk <= 1) {
                // abbreviated stsc table: chunk indexes are simply incrementing per sample
                for (uint32_t j = sampleIndex; j < track.m_samples.size(); j++) {
                    track.m_samples[j].chunkIndex = chunkIndex++;
                    sampleIndex++;
                }
            } else {
                while (sampleIndex < track.m_samples.size()) {
                    for (uint32_t j = 0; j < samplesPerChunk; j++) {
                        auto& sample = track.m_samples.at(sampleIndex);
                        sample.chunkIndex = chunkIndex;
                        sample.chunkNumber = j;
                        sampleIndex++;
                    }
                    chunkIndex++;
                }
            }
        } else {
            for (uint32_t j = 0; j < samplesPerChunk; j++) {

                if (sampleIndex + 1 > track.m_samples.size()) {
                    track.m_samples.resize(sampleIndex + 1);
                }

                auto& sample = track.m_samples.at(sampleIndex);
                sample.chunkIndex = chunkIndex;
                sample.chunkNumber = j;
                sampleIndex++;
            }
        }

        prevIndex = chunkIndex;
    }
}

void Mp4Parser::read_stsd(Mp4Track& track, const mp4box& stsd)
{
    (void)stsd;
    readUint32(); // skip version & flags
    uint32_t entryCount = readUint32();

    for (uint32_t i = 0; i < entryCount; i++) {
        size_t start = static_cast<size_t>(m_stream->position());
        mp4box box;
        readBox(box);
        track.m_codecBoxType = box.type; // only supporting 1 type per track

        switch (track.getHandlerType()) {
        case MP4_soun: {
            if (box.type == MP4_mp4a || box.type == MP4_enca) {
                mp4audio& entry = track.m_audio;
                entry.reserved1[0] = readUint32();
                entry.reserved1[1] = readUint16();
                readUint16(); // reference_index
                entry.reserved1[0] = readUint32();
                entry.reserved1[1] = readUint32();
                entry.channelcount = readUint16();
                entry.samplesize = readUint16();
                entry.pre_defined = readUint16();
                entry.reserved2 = readUint16();
                entry.samplerate = readUint32() >> 16;

                m_log->debug("track %d audio channels %d, sample size %d, sample rate %d",
                    track.getId(), entry.channelcount, entry.samplesize, entry.samplerate);
            } else {
                mp4audio& entry = track.m_audio;
                entry.reserved1[0] = readUint32();
                entry.reserved1[1] = readUint32();
                entry.channelcount = readUint16();
                entry.samplesize = readUint16();
                entry.pre_defined = readUint16();
                entry.reserved2 = readUint16();
                entry.samplerate = readUint32();
            }

            auto read = static_cast<size_t>(m_stream->position() - start);
            readCodecData(track, box, read);
        } break;

        case MP4_vide: {
            if (box.type == MP4_avc1 || box.type == MP4_encv || box.type == MP4_vp09) {
                mp4visual& entry = track.m_video;

                m_stream->read(entry.reserved1, 6);
                entry.data_reference_index = readUint16();

                entry.pre_defined1 = readUint16();
                entry.reserved2 = readUint16();

                m_stream->read(entry.pre_defined2, 12);

                entry.width = readUint16();
                entry.height = readUint16();
                entry.horizresolution = readUint32();
                entry.vertresolution = readUint32();
                entry.reserved2 = static_cast<uint16_t>(readUint32());
                entry.frame_count = readUint16();

                m_stream->read(entry.compressorname, 32);

                entry.depth = readUint16();
                entry.pre_defined3 = readUint16();

                auto read = static_cast<size_t>(m_stream->position() - start);
                readCodecData(track, box, read);
            }
        } break;

        case MP4_text: {
            if (box.type == MP4_wvtt) {
                uint8_t reserved[6];
                m_stream->read(reserved, 6);
                readUint16(); // data_reference_index
            }
        } break;

        case MP4_hint:
        case MP4_meta:
        default:
            m_log->debug("track unknown handler %d entries %d", track.getHandlerType(), entryCount);
            m_stream->seek(static_cast<size_t>(m_stream->position()) + static_cast<size_t>(box.size));
            break;
        }
    }
}

void Mp4Parser::readCodecData(Mp4Track& track, const mp4box& box, size_t read)
{
    while (read < box.size) {
        mp4box inner;
        readBox(inner);
        size_t size = static_cast<size_t>(inner.size);
        switch (inner.type) {
        case MP4_avcC:
        case MP4_esds: {
            track.m_codecData.clear();
            track.m_codecData.resize(size);
            m_stream->seek(static_cast<size_t>(m_stream->position() - BoxHeaderSize));
            m_stream->read(track.m_codecData.data(), size); // include the whole avcc box
            read += size;
        } break;
        case MP4_vpcC: {
            readUint32(); // version flags
            mp4vp9 video {};
            video.profile = readUint8();
            video.level = readUint8();
            video.bitDepth = readUint8();
            auto byte = readUint8();
            video.chromaSubsampling = byte >> 1;
            video.videoFullRangeFlag = (byte & 0x01) == 0x01;
            video.colourPrimaries = readUint8();
            video.transferCharacteristics = readUint8();
            video.matrixCoefficients = readUint8();
            uint16_t dataSize = readUint16();
            if (dataSize) {
                track.m_codecData = readBuffer(dataSize);
            }
            read += size;
        } break;
        case MP4_sinf:
            read_sinf(track, inner);
            read += size;
            break;

        default:
            unhandledBox(box, inner);
            read += size;
            m_stream->seek(static_cast<size_t>(m_stream->position() + size - BoxHeaderSize));
            break;
        }
    }
}

void Mp4Parser::read_stss(Mp4Track& track)
{
    readUint32(); // skip version & flags
    uint32_t entryCount = readUint32();

    track.m_syncSamples.clear();

    for (uint32_t i = 0; i < entryCount; i++) {
        uint32_t sampleNumber = readUint32();
        track.m_syncSamples.push_back(sampleNumber - 1);
    }
}

void Mp4Parser::read_ctts(Mp4Track& track)
{
    readUint32(); // skip version & flags
    uint32_t entryCount = readUint32();
    track.m_samples.reserve(entryCount);

    uint32_t sampleIndex = 0;

    for (uint32_t i = 0; i < entryCount; i++) {
        uint32_t sampleCount = readUint32();
        uint32_t sampleOffset = readUint32();

        for (uint32_t j = 0; j < sampleCount; j++) {
            if (sampleIndex + 1 > track.m_samples.size()) {
                track.m_samples.resize(sampleIndex + 1);
            }

            track.m_samples[sampleIndex].compositionTimeOffset = sampleOffset;
            sampleIndex++;
        }
    }
}

void Mp4Parser::read_stts(Mp4Track& track)
{
    readUint32(); // skip version & flags
    uint32_t entryCount = readUint32();
    track.m_samples.reserve(entryCount);

    uint32_t sampleIndex = 0;
    uint64_t duration = 0;

    for (uint32_t i = 0; i < entryCount; i++) {
        uint32_t sampleCount = readUint32();
        uint32_t sampleDelta = readUint32();

        for (uint32_t j = 0; j < sampleCount; j++) {
            if (sampleIndex + 1 > track.m_samples.size()) {
                track.m_samples.resize(sampleIndex + 1);
            }

            track.m_samples[sampleIndex].duration = sampleDelta;
            duration += getSampleDuration(track, track.m_samples[sampleIndex]);

            sampleIndex++;
        }
    }

    track.m_duration = duration;
}

void Mp4Parser::read_stsz(Mp4Track& track)
{
    readUint32(); // skip version & flags
    uint32_t sampleSize = readUint32();
    uint32_t sampleCount = readUint32();
    track.m_samples.resize(sampleCount);

    if (sampleSize == 0) {
        for (uint32_t i = 0; i < sampleCount; i++) {
            uint32_t entrySize = readUint32();
            track.m_samples[i].size = entrySize;
        }
    } else {
        for (auto& sample : track.m_samples) {
            sample.size = sampleSize;
        }
    }
}

void Mp4Parser::read_moof(const mp4box& moof)
{
    m_moofOffset = static_cast<size_t>(m_stream->position());
    readBoxes(moof, [this, &moof](mp4box& box) {
        switch (box.type) {
        case MP4_mfhd:
            read_mfhd();
            break;

        case MP4_traf:
            read_traf(box);
            break;

        case MP4_meta:
            break;

        default:
            unhandledBox(moof, box);
            break;
        }
        return true;
    });
}

void Mp4Parser::read_mfhd()
{
    readUint32(); // version & flags
    m_sequenceNumber = readUint32();
    m_isFragmented = true;
}

void Mp4Parser::read_traf(const mp4box& traf)
{
    std::shared_ptr<Mp4Track> track = nullptr;
    readBoxes(traf, [this, &traf, &track](mp4box& box) {
        switch (box.type) {
        case MP4_tfhd:
            track = read_tfhd();
            break;

        case MP4_tfdt:
            if (track) {
                read_tfdt(*track);
            } else {
                m_log->error("tfdt: no track read");
            }
            break;

        case MP4_trun:
            if (track) {
                read_trun(*track);
            } else {
                m_log->error("trun: no track read");
            }
            break;

            // DRM NOTE:
            // we currently only support subsample info in senc, not mdat

        case MP4_sbgp:
            if (track) {
                read_sbgp(*track);
            } else {
                m_log->error("sbgp: no track read");
            }
            break;

        case MP4_saiz:
            if (track) {
                read_saiz(*track);
            } else {
                m_log->error("trun: no track read");
            }
            break;

        case MP4_saio:
            if (track) {
                read_saio(*track);
            } else {
                m_log->error("trun: no track read");
            }
            break;

        case MP4_senc:
            if (track) {
                read_senc(*track);
            } else {
                m_log->error("trun: no track read");
            }
            break;

        default:
            unhandledBox(traf, box);
            break;
        }
        return true;
    });
}

std::shared_ptr<Mp4Track> Mp4Parser::read_tfhd()
{
    uint32_t versionFlags = readUint32();
    uint32_t trackID = readUint32();
    std::shared_ptr<Mp4Track> track = getTrackById(trackID);

    if (track == nullptr) {
        m_log->warn("No track for id %d", trackID);
        return nullptr;
    }

    mp4tfhd& fragment = track->m_fragment;
    fragment.tf_flags = 0x00ffffff & versionFlags;
    fragment.track_ID = trackID;

    // read optional fields
    if (fragment.tf_flags & mp4tfhd_flag::TFHD_BASE_DATA_OFFSET) {
        fragment.base_data_offset = readUint64();
    }

    if (fragment.tf_flags & mp4tfhd_flag::TFHD_STSD_ID) {
        fragment.sample_description_index = readUint32();
    }

    if (fragment.tf_flags & mp4tfhd_flag::TFHD_DEFAULT_DURATION) {
        fragment.default_sample_duration = readUint32();
    }

    if (fragment.tf_flags & mp4tfhd_flag::TFHD_DEFAULT_SIZE) {
        fragment.default_sample_size = readUint32();
    }

    if (fragment.tf_flags & mp4tfhd_flag::TFHD_DEFAULT_FLAGS) {
        fragment.default_sample_flags = readUint32();
    }

    return track;
}

void Mp4Parser::read_tfdt(Mp4Track& track)
{
    uint32_t version = readUint32() >> 24;

    if (version == 1) {
        track.m_baseMediaDecodeTime = readUint64();
    } else {
        track.m_baseMediaDecodeTime = readUint32();
    }
}

void Mp4Parser::read_trun(Mp4Track& track)
{
    track.m_samples.clear();
    mp4trun& run = track.m_run;
    uint32_t flags = readUint32();
    uint32_t version = flags >> 24;

    run.sample_count = readUint32();
    if (track.m_samples.size() != run.sample_count) {
        track.m_samples.resize(run.sample_count);
    }

    if (flags & mp4trun_flag::TRUN_DATA_OFFSET) {
        run.data_offset = readUint32();
    }

    if (flags & mp4trun_flag::TRUN_FIRST_SAMPLE_FLAGS) {
        run.first_sample_flags = readUint32();
    }

    uint64_t duration = 0;

    for (uint32_t i = 0; i < run.sample_count; i++) {
        mp4sample& sample = track.m_samples[i];
        sample.flags = track.m_fragment.default_sample_flags;
        if (sample.flags == 0) {
            sample.flags = m_trackFragmentDefaults[track.getId()].default_sample_flags;
        }

        if (flags & mp4trun_flag::TRUN_SAMPLE_DURATION) {
            sample.duration = readUint32();
        }

        if (flags & mp4trun_flag::TRUN_SAMPLE_SIZE) {
            sample.size = readUint32();
        } else {
            if (run.default_sample_size) {
                sample.size = run.default_sample_size;
            } else {
                sample.size = track.m_fragment.default_sample_size;
            }
        }

        if (flags & mp4trun_flag::TRUN_SAMPLE_FLAGS) {
            sample.flags = readUint32();
        } else if (i == 0 && (flags & mp4trun_flag::TRUN_FIRST_SAMPLE_FLAGS)) {
            sample.flags = run.first_sample_flags;
        }

        if (flags & mp4trun_flag::TRUN_SAMPLE_CTS) {
            if (version == 0) {
                sample.compositionTimeOffset = readUint32();
            } else {
                sample.compositionTimeOffset = static_cast<int32_t>(readUint32());
            }
        }

        duration += getSampleDuration(track, sample);
    }

    track.m_duration = duration;
}

void Mp4Parser::read_sgpd(Mp4Track& track)
{
    uint32_t flags = readUint32();
    uint32_t version = flags >> 24;
    uint32_t grouping_type = readUint32();
    (void)version, (void)track;

    if (MP4_seig == grouping_type && 1 == version) {
        Mp4Track::SampleGroup sg;
        uint32_t default_length = readUint32();
        uint32_t entry_count = readUint32();
        for (; entry_count; --entry_count) {
            uint32_t length = default_length;
            if (1 == version && 0 == default_length) {
                length = readUint32();
            }
            (void)length; // TODO check this (cenc is always the same, so its ok for now)
            sg.patternEncryption = readUint8(); // Only for cbcs
            sg.isProtected = readUint8();
            sg.perSampleIVSize = readUint8();
            sg.kid = readBuffer(16);

            // Only used for cbcs (hence not used here)
            if (sg.isProtected == 1 && sg.perSampleIVSize == 0) {
                assert(false);
                auto constant_IV_size = readUint8();
                sg.constantIV = readBuffer(constant_IV_size);
            }

            track.m_sampleGroup.push_back(std::move(sg));
        }
    }
}

void Mp4Parser::read_sbgp(Mp4Track& track)
{
    uint32_t flags = readUint32();
    uint32_t version = flags >> 24;
    (void)version, (void)track;
    uint32_t grouping_type = readUint32();
    assert(MP4_seig == grouping_type);
    (void)grouping_type;
    if (version == 1) {
        uint32_t grouping_type_parameter = readUint32();
        (void)grouping_type_parameter;
    }

    uint32_t entry_count = readUint32();
    for (uint32_t i = 0; i < entry_count; i++) {
        uint32_t sample_count = readUint32();
        uint32_t group_description_index = readUint32();
        // Insert `sample_count` copies of `group_description_index`
        // This is a less compact, but easier to use, format
        track.m_sampleToGroup.insert(track.m_sampleToGroup.end(), sample_count, group_description_index);
    }
}

void Mp4Parser::read_saiz(Mp4Track& track)
{
    uint32_t flags = readUint32();
    uint32_t version = flags >> 24;
    (void)version, (void)track;
    if (flags & 1) {
        uint32_t aux_info_type = readUint32(); // should be cenc/cbcs
        uint32_t aux_info_type_parameter = readUint32(); // should be 0
        (void)aux_info_type, (void)aux_info_type_parameter;
    }

    std::vector<uint8_t> sample_info_size;
    uint8_t default_sample_info_size = readUint8();
    uint32_t sample_count = readUint32();
    (void)sample_count;
    if (0 == default_sample_info_size) {
        sample_info_size.push_back(readUint8());
    }
}

void Mp4Parser::read_saio(Mp4Track& track)
{
    uint32_t flags = readUint32();
    uint32_t version = flags >> 24;
    uint8_t entry_count = static_cast<uint8_t>(readUint32());
    (void)track, (void)flags, (void)version, (void)entry_count;
    assert(1 == entry_count); /// using senc instead of mdat, so data is contiguous, hence one entry
    track.m_sencOffset = readUint32(); // relative to moof
    track.m_sencOffset += m_moofOffset;
}

void Mp4Parser::read_senc(Mp4Track& track)
{
    uint32_t flags = readUint32();
    uint8_t entry_count = static_cast<uint8_t>(readUint32());
    if (track.m_samples.size() != entry_count) {
        track.m_samples.resize(entry_count);
    }

    for (unsigned i = 0; i < entry_count; ++i) {
        auto ivSize = track.m_defaultPerSampleIvSize ? track.m_defaultPerSampleIvSize : 16;
        if (ivSize) {
            track.m_samples[i].initializationVector = readBuffer(static_cast<size_t>(ivSize));
            // if iv size is 8 remaining 8 bytes should be set to zero
            if (ivSize == 8) {
                track.m_samples[i].initializationVector.resize(16, 0);
            }
        }

        if (flags & 2) { // 2 == use subsample
            uint16_t subsampleCount = readUint16();
            for (int j = 0; j < subsampleCount; ++j) {
                uint16_t clearBytes = readUint16();
                uint32_t protectedBytes = readUint32();
                track.m_samples[i].subsampleRange.emplace_back(clearBytes, protectedBytes);
            }
        }
    }
}

void Mp4Parser::read_schi(Mp4Track& track, const mp4box& schi)
{
    readBoxes(schi, [this, &schi, &track](mp4box& box) {
        switch (box.type) {
        case MP4_tenc: {
            uint32_t flags = readUint32();
            uint32_t version = flags >> 24;
            (void)flags, (void)version;
            readUint8(); // reserved
            track.m_patternEncryption = readUint8();
            track.m_defaultIsProtected = readUint8();
            track.m_defaultPerSampleIvSize = readUint8();
            track.m_defaultKeyId = readBuffer(16);
            if (1 == track.m_defaultIsProtected && 0 == track.m_defaultPerSampleIvSize) {
                uint8_t defaultConstantIvSize = readUint8();
                track.m_defaultConstantIv = readBuffer(defaultConstantIvSize);
            }
        } break;
        default:
            unhandledBox(schi, box);
            break;
        };
        return true;
    });
}

void Mp4Parser::read_sinf(Mp4Track& track, const mp4box& sinf)
{
    readBoxes(sinf, [this, &track](mp4box& box) {
        switch (box.type) {
        case MP4_frma:
            track.m_originalFormat = readUint32();
            break;
        case MP4_schm:
            track.m_protectionSchemeType = readUint32();
            track.m_protectionSchemeVersion = readUint32();
            break;
        case MP4_schi:
            read_schi(track, box);
            break;
        };
        return true;
    });
}

void Mp4Parser::unhandledBox(const mp4box& parent, mp4box& box)
{
    (void)parent;
    (void)box;
}
}
}
