Discussion:
[PATCH 01/12] dashenc: fix ISO8601 UTC parsing
(too old to reply)
Peter Große
2017-01-26 23:25:07 UTC
Permalink
From: Anton Schubert <***@mailbox.org>

Appends Z to timestamp to force ISO8601 datetime parsing as UTC.
Without Z, some browsers (Chrome) interpret the timestamp as
localtime and others (Firefox) interpret it as UTC.

Signed-off-by: Anton Schubert <***@mailbox.org>
---
libavformat/dashenc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index ce01860..2b27950 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -429,7 +429,7 @@ static void format_date_now(char *buf, int size)
struct tm *ptm, tmbuf;
ptm = gmtime_r(&t, &tmbuf);
if (ptm) {
- if (!strftime(buf, size, "%Y-%m-%dT%H:%M:%S", ptm))
+ if (!strftime(buf, size, "%Y-%m-%dT%H:%M:%SZ", ptm))
buf[0] = '\0';
}
}
--
2.10.2
Peter Große
2017-01-26 23:25:09 UTC
Permalink
From: Anton Schubert <***@mailbox.org>

to avoid rebuffering on the clientside for difficult network conditions.

Signed-off-by: Anton Schubert <***@mailbox.org>
---
libavformat/dashenc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 865f50a..44785cd 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -481,7 +481,7 @@ static int write_manifest(AVFormatContext *s, int final)
}
}
avio_printf(out, "\tminBufferTime=\"");
- write_time(out, c->last_duration);
+ write_time(out, c->last_duration * 2);
avio_printf(out, "\">\n");
avio_printf(out, "\t<ProgramInformation>\n");
if (title) {
--
2.10.2
Martin Storsjö
2017-01-27 12:33:53 UTC
Permalink
Post by Peter Große
to avoid rebuffering on the clientside for difficult network conditions.
---
libavformat/dashenc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 865f50a..44785cd 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -481,7 +481,7 @@ static int write_manifest(AVFormatContext *s, int final)
}
}
avio_printf(out, "\tminBufferTime=\"");
- write_time(out, c->last_duration);
+ write_time(out, c->last_duration * 2);
avio_printf(out, "\">\n");
avio_printf(out, "\t<ProgramInformation>\n");
if (title) {
--
2.10.2
I guess this is ok. I tried to keep the buffer amount low to allow as
reasonably low latency as possible in practice, but I guess this is a
safer compromise.

// Martin
Peter Große
2017-01-26 23:25:08 UTC
Permalink
From: Peter Große <***@friks.de>

Signed-off-by: Peter Große <***@friiks.de>
---
libavformat/dashenc.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 2b27950..865f50a 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -439,7 +439,7 @@ static int write_manifest(AVFormatContext *s, int final)
DASHContext *c = s->priv_data;
AVIOContext *out;
char temp_filename[1024];
- int ret, i;
+ int ret, i, as_id = 0;
AVDictionaryEntry *title = av_dict_get(s->metadata, "title", NULL, 0);

snprintf(temp_filename, sizeof(temp_filename), "%s.tmp", s->filename);
@@ -494,15 +494,15 @@ static int write_manifest(AVFormatContext *s, int final)
OutputStream *os = &c->streams[0];
int start_index = FFMAX(os->nb_segments - c->window_size, 0);
int64_t start_time = av_rescale_q(os->segments[start_index]->time, s->streams[0]->time_base, AV_TIME_BASE_Q);
- avio_printf(out, "\t<Period start=\"");
+ avio_printf(out, "\t<Period id=\"0\" start=\"");
write_time(out, start_time);
avio_printf(out, "\">\n");
} else {
- avio_printf(out, "\t<Period start=\"PT0.0S\">\n");
+ avio_printf(out, "\t<Period id=\"0\" start=\"PT0.0S\">\n");
}

if (c->has_video) {
- avio_printf(out, "\t\t<AdaptationSet contentType=\"video\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n");
+ avio_printf(out, "\t\t<AdaptationSet id=\"%d\" contentType=\"video\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n", as_id++);
for (i = 0; i < s->nb_streams; i++) {
AVStream *st = s->streams[i];
OutputStream *os = &c->streams[i];
@@ -515,7 +515,7 @@ static int write_manifest(AVFormatContext *s, int final)
avio_printf(out, "\t\t</AdaptationSet>\n");
}
if (c->has_audio) {
- avio_printf(out, "\t\t<AdaptationSet contentType=\"audio\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n");
+ avio_printf(out, "\t\t<AdaptationSet id=\"%d\" contentType=\"audio\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n", as_id++);
for (i = 0; i < s->nb_streams; i++) {
AVStream *st = s->streams[i];
OutputStream *os = &c->streams[i];
--
2.10.2
Martin Storsjö
2017-01-27 12:33:04 UTC
Permalink
Post by Peter Große
---
libavformat/dashenc.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
LGTM, will push

// Martin
Peter Große
2017-01-26 23:25:10 UTC
Permalink
From: Peter Große <***@friks.de>

If set, adds a UTCTime tag in the manifest.
See http://vm2.dashif.org/dash.js/docs/jsdocs/MediaPlayer.html#addUTCTimingSource for more information.
Usable default: "https://time.akamai.com/?iso"

Signed-off-by: Peter Große <***@friiks.de>
---
libavformat/dashenc.c | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 44785cd..198932c 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -94,6 +94,7 @@ typedef struct DASHContext {
const char *single_file_name;
const char *init_seg_name;
const char *media_seg_name;
+ const char *utc_timing_url;
} DASHContext;

static int dash_write(void *opaque, uint8_t *buf, int buf_size)
@@ -490,6 +491,9 @@ static int write_manifest(AVFormatContext *s, int final)
av_free(escaped);
}
avio_printf(out, "\t</ProgramInformation>\n");
+ if (c->utc_timing_url)
+ avio_printf(out, "\t<UTCTiming schemeIdUri=\"urn:mpeg:dash:utc:http-xsdate:2014\" value=\"%s\"/>\n", c->utc_timing_url);
+
if (c->window_size && s->nb_streams > 0 && c->streams[0].nb_segments > 0 && !c->use_template) {
OutputStream *os = &c->streams[0];
int start_index = FFMAX(os->nb_segments - c->window_size, 0);
@@ -981,6 +985,7 @@ static const AVOption options[] = {
{ "single_file_name", "DASH-templated name to be used for baseURL. Implies storing all segments in one file, accessed using byte ranges", OFFSET(single_file_name), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E },
{ "init_seg_name", "DASH-templated name to used for the initialization segment", OFFSET(init_seg_name), AV_OPT_TYPE_STRING, {.str = "init-stream$RepresentationID$.m4s"}, 0, 0, E },
{ "media_seg_name", "DASH-templated name to used for the media segments", OFFSET(media_seg_name), AV_OPT_TYPE_STRING, {.str = "chunk-stream$RepresentationID$-$Number%05d$.m4s"}, 0, 0, E },
+ { "utc_timing_url", "URL of the page that will return the UTC timestamp in ISO format", OFFSET(utc_timing_url), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
{ NULL },
};
--
2.10.2
Martin Storsjö
2017-01-27 12:58:21 UTC
Permalink
Post by Peter Große
If set, adds a UTCTime tag in the manifest.
UTCTiming, not UTCTime, right?
Post by Peter Große
See http://vm2.dashif.org/dash.js/docs/jsdocs/MediaPlayer.html#addUTCTimingSource for more information.
Usable default: "https://time.akamai.com/?iso"
Hmm, I don't see this documented in ISO/IEC 23009-1:2014 at least, and the
documentation from dashif don't seem to define it either.

I won't mind including it, but I'd like to know what standard defines it
before merging.

// Martin
Peter Große
2017-01-27 17:30:36 UTC
Permalink
On Fri, 27 Jan 2017 14:58:21 +0200 (EET)
Post by Martin Storsjö
Post by Peter Große
If set, adds a UTCTime tag in the manifest.
UTCTiming, not UTCTime, right?
Right.
Post by Martin Storsjö
Post by Peter Große
See
http://vm2.dashif.org/dash.js/docs/jsdocs/MediaPlayer.html#addUTCTimingSource
for more information. Usable default: "https://time.akamai.com/?iso"
Hmm, I don't see this documented in ISO/IEC 23009-1:2014 at least, and the
documentation from dashif don't seem to define it either.
I won't mind including it, but I'd like to know what standard defines it
before merging.
Mh true, this is not part of the standard, but introduced in their "Guidelines
for Implementations: DASH-IF Interoperability Points" [1] in Section 4.7.

Which URL shall I reference in the commit message? The guidelines link page,
the PDF, or not at all but mentioning the document's title?

The "usable default" is just a hint. It is not mentioned in any document, it was
just what I found in player implementations (like mentioned in the message
above). I could drop it.

Regards
Peter

[1] http://dashif.org/guidelines/
Martin Storsjö
2017-01-27 20:04:56 UTC
Permalink
Post by Peter Große
On Fri, 27 Jan 2017 14:58:21 +0200 (EET)
Post by Martin Storsjö
Post by Peter Große
If set, adds a UTCTime tag in the manifest.
UTCTiming, not UTCTime, right?
Right.
Post by Martin Storsjö
Post by Peter Große
See
http://vm2.dashif.org/dash.js/docs/jsdocs/MediaPlayer.html#addUTCTimingSource
for more information. Usable default: "https://time.akamai.com/?iso"
Hmm, I don't see this documented in ISO/IEC 23009-1:2014 at least, and the
documentation from dashif don't seem to define it either.
I won't mind including it, but I'd like to know what standard defines it
before merging.
Mh true, this is not part of the standard, but introduced in their "Guidelines
for Implementations: DASH-IF Interoperability Points" [1] in Section 4.7.
Which URL shall I reference in the commit message? The guidelines link page,
the PDF, or not at all but mentioning the document's title?
I'd at least refer to the guidelines page, but linking explicitly to the
PDF as well probably is good.
Post by Peter Große
The "usable default" is just a hint. It is not mentioned in any document, it was
just what I found in player implementations (like mentioned in the message
above). I could drop it.
No, do keep it, any such extra info is useful.

But I started wondering, is there any actual use for this, when we
explicitly list all the segments (either as individual segment filenames
or via the template). As far as I can see, this is mostly necessary if you
implicitly calculate the live edge based on availabilityStartTime and the
segment length. So is there actually any gain from using it in our
configuration?

// Martin
Anton Schubert
2017-01-30 12:33:58 UTC
Permalink
Post by Peter Große
On Fri, 27 Jan 2017 14:58:21 +0200 (EET)
Post by Martin Storsjö
Post by Peter Große
See
http://vm2.dashif.org/dash.js/docs/jsdocs/MediaPlayer.html#addUTCTimingSource
for more information. Usable default: "https://time.akamai.com/?iso"
Hmm, I don't see this documented in ISO/IEC 23009-1:2014 at least, and the
documentation from dashif don't seem to define it either.
I won't mind including it, but I'd like to know what standard defines it
before merging.
Mh true, this is not part of the standard, but introduced in their "Guidelines
for Implementations: DASH-IF Interoperability Points" [1] in Section 4.7.
Which URL shall I reference in the commit message? The guidelines link page,
the PDF, or not at all but mentioning the document's title?
I'd at least refer to the guidelines page, but linking explicitly to the PDF as well probably is good.
Post by Peter Große
The "usable default" is just a hint. It is not mentioned in any document, it was
just what I found in player implementations (like mentioned in the message
above). I could drop it.
No, do keep it, any such extra info is useful.
But I started wondering, is there any actual use for this, when we explicitly list all the segments (either as individual segment filenames or via the template). As far as I can see, this is mostly necessary if you implicitly calculate the live edge based on availabilityStartTime and the segment length. So is there actually any gain from using it in our configuration?
Yes, because the players will always try to calculate the live-edge, as they don't know
whether the segment they are requesting is available yet. They can't trust the
browsers localtime and they don't know whether the availabilityStartTime is ok for
synchronization.

That's also why Shaka-player now gives a warning if there is no UTCTiming tag present
in live-manifests.
https://github.com/google/shaka-player/commit/1fb78929c1551a3e4f1a07323ce67ee622503273

Best,
Anton
Martin Storsjö
2017-01-30 19:05:36 UTC
Permalink
Post by Anton Schubert
Post by Martin Storsjö
But I started wondering, is there any actual use for this, when we
explicitly list all the segments (either as individual segment
filenames or via the template). As far as I can see, this is mostly
necessary if you implicitly calculate the live edge based on
availabilityStartTime and the segment length. So is there actually any
gain from using it in our configuration?
Yes, because the players will always try to calculate the live-edge, as they don't know
whether the segment they are requesting is available yet.
Yes, I know that you need to do this if you use the implicit mechanism
where you don't list the individual segments. But you can also list them
explicitly, which we do by default.

There's (at least) three different ways of indicating the available
segments:

1) Via <SegmentList> and <SegmentURL>, explicitly listing the segments
(-use_template 0)

2) Via <SegmentTemplate> and <SegmentTimeline>, explicitly describing all
individual segments via the timeline. (This is the default, parameters
-use_template 1 -use_timeline 1.)

3) Impcliitly, via <SegmentTemplate> and via availabilityStartTime. In
this case, I can understand that the UTCTiming element is needed to
actually sync clocks; otherwise the availabilityStartTime is hard to
interpret correctly.

Now when I wrote the previous mail, I didn't even remember that we did
support case 3) - sorry about that. In that case, I do see that it is
needed.

My question is, for cases 1) and 2), the way you guys interpret the spec,
should the player ignore the actual list of segments (via <SegmentURL> or
<SegmentTimeline>) and use availabilityStartTime to guess which ones
really are available, or can the player just assume that all explicitly
listed segments really are available?
Post by Anton Schubert
They can't trust the browsers localtime
Yes, that goes without saying
Post by Anton Schubert
and they don't know whether the availabilityStartTime is ok for
synchronization.
That's also why Shaka-player now gives a warning if there is no UTCTiming tag present
in live-manifests.
https://github.com/google/shaka-player/commit/1fb78929c1551a3e4f1a07323ce67ee622503273
Ok, so I can see that Shaka warns if the tag isn't available - that should
be reason enough to add it. This was kinda what I wanted to hear to
understand the patch better.

I'm still quite interested to know, though, does Shaka actually use
availabilityStartTime in cases 1) and 2) above?

// Martin
Anton Schubert
2017-02-05 21:03:39 UTC
Permalink
Post by Anton Schubert
But I started wondering, is there any actual use for this, when we explicitly list all the segments (either as individual segment filenames or via the template). As far as I can see, this is mostly necessary if you implicitly calculate the live edge based on availabilityStartTime and the segment length. So is there actually any gain from using it in our configuration?
Yes, because the players will always try to calculate the live-edge, as they don't know
whether the segment they are requesting is available yet.
Yes, I know that you need to do this if you use the implicit mechanism where you don't list the individual segments. But you can also list them explicitly, which we do by default.
1) Via <SegmentList> and <SegmentURL>, explicitly listing the segments (-use_template 0)
2) Via <SegmentTemplate> and <SegmentTimeline>, explicitly describing all individual segments via the timeline. (This is the default, parameters -use_template 1 -use_timeline 1.)
3) Impcliitly, via <SegmentTemplate> and via availabilityStartTime. In this case, I can understand that the UTCTiming element is needed to actually sync clocks; otherwise the availabilityStartTime is hard to interpret correctly.
Now when I wrote the previous mail, I didn't even remember that we did support case 3) - sorry about that. In that case, I do see that it is needed.
My question is, for cases 1) and 2), the way you guys interpret the spec, should the player ignore the actual list of segments (via <SegmentURL> or <SegmentTimeline>) and use availabilityStartTime to guess which ones really are available, or can the player just assume that all explicitly listed segments really are available?
As I interpret the spec it assumes that the player calculates the time at which the segments become available and
doesn't just assume them as available.

In Annex A.3.1 it is mentioned that "Segments are available at its assigned URL if at wall-clock time NOW the Segment availability start time is
smaller than or equal to NOW and the Segment availability end time is larger than or equal to NOW."

So the player will need to compare it's clock time with the segment availability start times it gets from
the segmentlist/segmenttemplate.
Post by Anton Schubert
they don't know whether the availabilityStartTime is ok for
synchronization.
That's also why Shaka-player now gives a warning if there is no UTCTiming tag present
in live-manifests.
https://github.com/google/shaka-player/commit/1fb78929c1551a3e4f1a07323ce67ee622503273
Ok, so I can see that Shaka warns if the tag isn't available - that should be reason enough to add it. This was kinda what I wanted to hear to understand the patch better.
I'm still quite interested to know, though, does Shaka actually use availabilityStartTime in cases 1) and 2) above?
Shaka-player always uses availabilityStartTime to calculate it's live-edge for
live-manifests. It's segmentAvailabilityEnd and seekRangeEnd are determined from that.

Best, Anton
Martin Storsjö
2017-02-05 21:12:14 UTC
Permalink
Post by Anton Schubert
As I interpret the spec it assumes that the player calculates the time
at which the segments become available and doesn't just assume them as
available.
In Annex A.3.1 it is mentioned that "Segments are available at its
assigned URL if at wall-clock time NOW the Segment availability start
time is smaller than or equal to NOW and the Segment availability end
time is larger than or equal to NOW."
So the player will need to compare it's clock time with the segment
availability start times it gets from the segmentlist/segmenttemplate.
Shaka-player always uses availabilityStartTime to calculate it's
live-edge for live-manifests. It's segmentAvailabilityEnd and
seekRangeEnd are determined from that.
Ok - that's quite annoying. So there's no mechanism in dash to tell it to
skip all this insanity and just make it work like hls does with respect to
this?

// Martin
Anton Schubert
2017-02-05 22:24:06 UTC
Permalink
As I interpret the spec it assumes that the player calculates the time at which the segments become available and doesn't just assume them as available.
In Annex A.3.1 it is mentioned that "Segments are available at its assigned URL if at wall-clock time NOW the Segment availability start time is smaller than or equal to NOW and the Segment availability end time is larger than or equal to NOW."
So the player will need to compare it's clock time with the segment availability start times it gets from the segmentlist/segmenttemplate.
Shaka-player always uses availabilityStartTime to calculate it's live-edge for live-manifests. It's segmentAvailabilityEnd and seekRangeEnd are determined from that.
Ok - that's quite annoying. So there's no mechanism in dash to tell it to skip all this insanity and just make it work like hls does with respect to this?
Afaik it's only possible for VOD.

# Anton

Peter Große
2017-01-26 23:25:11 UTC
Permalink
The current implementation creates new segments comparing

pkt->pts - first_pts > total_duration

This works fine, but if the keyframe interval is smaller than "min_seg_duration"
segments shorter than the minimum segment duration are created.

Example: keyint=50, min_seg_duration=3000000
segment 1 contains keyframe 1 (duration=2s < total_duration=3s)
and keyframe 2 (duration=4s >= total_duration=3s)
segment 2 contains keyframe 3 (duration=6s >= total_duration=6s)
segment 3 contains keyframe 4 (duration=8s < total_duration=9s)
and keyframe 5 (duration=10s >= total_duration=9s)
...

Segment 2 is only 2s long, shorter than min_seg_duration = 3s.

To fix this, new segments are created based on the actual written duration.
Otherwise the option name "min_seg_duration" is misleading.

Signed-off-by: Peter Große <***@friiks.de>
---
libavformat/dashenc.c | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 198932c..93c292f 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -62,7 +62,7 @@ typedef struct OutputStream {
int ctx_inited;
uint8_t iobuf[32768];
AVIOContext *out;
- int packets_written;
+ int duration_written;
char initfile[1024];
int64_t init_start_pos;
int init_range_length;
@@ -785,7 +785,7 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
int64_t start_pos;
int range_length, index_length = 0;

- if (!os->packets_written)
+ if (!os->duration_written)
continue;

// Flush the single stream that got a keyframe right now.
@@ -823,7 +823,7 @@ static int dash_flush(AVFormatContext *s, int final, int stream)

av_write_frame(os->ctx, NULL);
avio_flush(os->ctx->pb);
- os->packets_written = 0;
+ os->duration_written = 0;

range_length = avio_tell(os->ctx->pb) - start_pos;
if (c->single_file) {
@@ -868,7 +868,6 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt)
DASHContext *c = s->priv_data;
AVStream *st = s->streams[pkt->stream_index];
OutputStream *os = &c->streams[pkt->stream_index];
- int64_t seg_end_duration = (os->segment_index) * (int64_t) c->min_seg_duration;
int ret;

ret = update_stream_extradata(s, os, st->codecpar);
@@ -897,9 +896,9 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt)
os->first_pts = pkt->pts;

if ((!c->has_video || st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) &&
- pkt->flags & AV_PKT_FLAG_KEY && os->packets_written &&
- av_compare_ts(pkt->pts - os->first_pts, st->time_base,
- seg_end_duration, AV_TIME_BASE_Q) >= 0) {
+ pkt->flags & AV_PKT_FLAG_KEY && os->duration_written &&
+ av_compare_ts(os->duration_written + pkt->duration, st->time_base,
+ c->min_seg_duration, AV_TIME_BASE_Q) >= 0) {
int64_t prev_duration = c->last_duration;

c->last_duration = av_rescale_q(pkt->pts - os->start_pts,
@@ -922,7 +921,7 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt)
return ret;
}

- if (!os->packets_written) {
+ if (!os->duration_written) {
// If we wrote a previous segment, adjust the start time of the segment
// to the end of the previous one (which is the same as the mp4 muxer
// does). This avoids gaps in the timeline.
@@ -935,7 +934,7 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt)
os->max_pts = pkt->pts + pkt->duration;
else
os->max_pts = FFMAX(os->max_pts, pkt->pts + pkt->duration);
- os->packets_written++;
+ os->duration_written += pkt->duration;
return ff_write_chained(os->ctx, 0, pkt, s);
}
--
2.10.2
Martin Storsjö
2017-01-27 13:09:52 UTC
Permalink
Post by Peter Große
The current implementation creates new segments comparing
pkt->pts - first_pts > total_duration
I guess this would be more understandable written like this:
pkt->pts - first_pts > nb_segs * min_seg_duration
or
total_duration > nb_segs * min_seg_duration
Post by Peter Große
This works fine, but if the keyframe interval is smaller than "min_seg_duration"
segments shorter than the minimum segment duration are created.
Example: keyint=50, min_seg_duration=3000000
segment 1 contains keyframe 1 (duration=2s < total_duration=3s)
and keyframe 2 (duration=4s >= total_duration=3s)
segment 2 contains keyframe 3 (duration=6s >= total_duration=6s)
segment 3 contains keyframe 4 (duration=8s < total_duration=9s)
and keyframe 5 (duration=10s >= total_duration=9s)
...
Segment 2 is only 2s long, shorter than min_seg_duration = 3s.
FWIW, this was the indended behaviour originally, but I agree that the
option name isn't quite optimal given this.
Post by Peter Große
To fix this, new segments are created based on the actual written duration.
Otherwise the option name "min_seg_duration" is misleading.
---
libavformat/dashenc.c | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 198932c..93c292f 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -62,7 +62,7 @@ typedef struct OutputStream {
int ctx_inited;
uint8_t iobuf[32768];
AVIOContext *out;
- int packets_written;
+ int duration_written;
char initfile[1024];
int64_t init_start_pos;
int init_range_length;
@@ -785,7 +785,7 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
int64_t start_pos;
int range_length, index_length = 0;
- if (!os->packets_written)
+ if (!os->duration_written)
continue;
// Flush the single stream that got a keyframe right now.
@@ -823,7 +823,7 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
av_write_frame(os->ctx, NULL);
avio_flush(os->ctx->pb);
- os->packets_written = 0;
+ os->duration_written = 0;
range_length = avio_tell(os->ctx->pb) - start_pos;
if (c->single_file) {
@@ -868,7 +868,6 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt)
DASHContext *c = s->priv_data;
AVStream *st = s->streams[pkt->stream_index];
OutputStream *os = &c->streams[pkt->stream_index];
- int64_t seg_end_duration = (os->segment_index) * (int64_t) c->min_seg_duration;
int ret;
ret = update_stream_extradata(s, os, st->codecpar);
@@ -897,9 +896,9 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt)
os->first_pts = pkt->pts;
if ((!c->has_video || st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) &&
- pkt->flags & AV_PKT_FLAG_KEY && os->packets_written &&
- av_compare_ts(pkt->pts - os->first_pts, st->time_base,
- seg_end_duration, AV_TIME_BASE_Q) >= 0) {
+ pkt->flags & AV_PKT_FLAG_KEY && os->duration_written &&
+ av_compare_ts(os->duration_written + pkt->duration, st->time_base,
+ c->min_seg_duration, AV_TIME_BASE_Q) >= 0) {
int64_t prev_duration = c->last_duration;
c->last_duration = av_rescale_q(pkt->pts - os->start_pts,
@@ -922,7 +921,7 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt)
return ret;
}
- if (!os->packets_written) {
+ if (!os->duration_written) {
// If we wrote a previous segment, adjust the start time of the segment
// to the end of the previous one (which is the same as the mp4 muxer
// does). This avoids gaps in the timeline.
@@ -935,7 +934,7 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt)
os->max_pts = pkt->pts + pkt->duration;
else
os->max_pts = FFMAX(os->max_pts, pkt->pts + pkt->duration);
- os->packets_written++;
+ os->duration_written += pkt->duration;
return ff_write_chained(os->ctx, 0, pkt, s);
}
This isn't the best way of achieving what you're trying to do.

Don't accumulate pkt->duration values. In general we can't really assume
that pkt->duration is set sensibly.

In some places we need an estimate of the duration of a packet, and in
those cases we fill it in using the duration of the previous packet (i.e.
the diff between this packets timestamp and the previous one) if we don't
have a definitive value available.

In those cases where we use an estimate of the duration, we make sure to
correct it as soon as possible when we get the real timestamps of the next
packet. But we shouldn't accumulate the values since that would end up
with accumulating the error, blowing it out of propertion at worst.


To get the behaviour you want, couldn't you just change the comparison
like this:

- av_compare_ts(pkt->pts - os->first_pts, st->time_base,
- seg_end_duration, AV_TIME_BASE_Q) >= 0) {
+ av_compare_ts(pkt->pts - os->start_pts, st->time_base,
+ c->min_seg_duration, AV_TIME_BASE_Q) >= 0) {

We already have the start timestamp of the current segment in
os->start_pts.

// Martin
Peter Große
2017-01-27 17:37:55 UTC
Permalink
On Fri, 27 Jan 2017 15:09:52 +0200 (EET)
Post by Martin Storsjö
Post by Peter Große
The current implementation creates new segments comparing
pkt->pts - first_pts > total_duration
pkt->pts - first_pts > nb_segs * min_seg_duration
or
total_duration > nb_segs * min_seg_duration
Agreed.
Post by Martin Storsjö
Post by Peter Große
@@ -935,7 +934,7 @@ static int dash_write_packet(AVFormatContext *s,
AVPacket *pkt) os->max_pts = pkt->pts + pkt->duration;
else
os->max_pts = FFMAX(os->max_pts, pkt->pts + pkt->duration);
- os->packets_written++;
+ os->duration_written += pkt->duration;
return ff_write_chained(os->ctx, 0, pkt, s);
}
This isn't the best way of achieving what you're trying to do.
Don't accumulate pkt->duration values. In general we can't really assume
that pkt->duration is set sensibly.
In some places we need an estimate of the duration of a packet, and in
those cases we fill it in using the duration of the previous packet (i.e.
the diff between this packets timestamp and the previous one) if we don't
have a definitive value available.
In those cases where we use an estimate of the duration, we make sure to
correct it as soon as possible when we get the real timestamps of the next
packet. But we shouldn't accumulate the values since that would end up
with accumulating the error, blowing it out of propertion at worst.
Ah ok, I see.
Post by Martin Storsjö
To get the behaviour you want, couldn't you just change the comparison
- av_compare_ts(pkt->pts - os->first_pts, st->time_base,
- seg_end_duration, AV_TIME_BASE_Q) >= 0) {
+ av_compare_ts(pkt->pts - os->start_pts, st->time_base,
+ c->min_seg_duration, AV_TIME_BASE_Q) >= 0) {
We already have the start timestamp of the current segment in
os->start_pts.
Looks like it, yes. Will change that.

Regards
Peter
Peter Große
2017-01-26 23:25:12 UTC
Permalink
From: Peter Große <***@friks.de>

Bandwidth information is required in the manifest, but not always provided by the demuxer.
So enable hinting the stream bandwidth via a metadata field, supports same values as codec bitrate setting.

Example: -metadata:s:v:0 bitrate=3500k

Signed-off-by: Peter Große <***@friiks.de>
---
libavformat/dashenc.c | 12 ++++++++++++
1 file changed, 12 insertions(+)

diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 93c292f..1cd9563 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -25,6 +25,7 @@
#endif

#include "libavutil/avstring.h"
+#include "libavutil/eval.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mathematics.h"
#include "libavutil/opt.h"
@@ -586,6 +587,17 @@ static int dash_write_header(AVFormatContext *s)
char filename[1024];

os->bit_rate = s->streams[i]->codecpar->bit_rate;
+ if (!os->bit_rate) {
+ // check possible bitrates provided via metadata
+ AVDictionaryEntry *bitrate;
+ bitrate = av_dict_get(s->streams[i]->metadata, "bitrate", NULL, 0);
+ if (bitrate) {
+ char *tail;
+ os->bit_rate = av_strtod(bitrate->value, &tail);
+ if (*tail)
+ os->bit_rate = 0;
+ }
+ }
if (os->bit_rate) {
snprintf(os->bandwidth_str, sizeof(os->bandwidth_str),
" bandwidth=\"%d\"", os->bit_rate);
--
2.10.2
Martin Storsjö
2017-01-27 13:13:11 UTC
Permalink
Post by Peter Große
Bandwidth information is required in the manifest, but not always provided by the demuxer.
So enable hinting the stream bandwidth via a metadata field, supports same values as codec bitrate setting.
Example: -metadata:s:v:0 bitrate=3500k
---
libavformat/dashenc.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
This is kinda abusing the metadata mechanism, which is mostly intended to
be used for user-visible metadata like author/title and such.

I'd like to ask for Anton's opinion on this (although I'm pretty sure
we've discussed it before).

// Martin
Anton Khirnov
2017-01-27 18:34:42 UTC
Permalink
Quoting Martin Storsjö (2017-01-27 14:13:11)
Post by Martin Storsjö
Post by Peter Große
Bandwidth information is required in the manifest, but not always provided by the demuxer.
So enable hinting the stream bandwidth via a metadata field, supports same values as codec bitrate setting.
Example: -metadata:s:v:0 bitrate=3500k
---
libavformat/dashenc.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
This is kinda abusing the metadata mechanism, which is mostly intended to
be used for user-visible metadata like author/title and such.
I'd like to ask for Anton's opinion on this (although I'm pretty sure
we've discussed it before).
If we did, I don't remember the conclusion :)

Anyway, I agree that this is a misuse of metadata. There are at least
two methods for passing the stream bitrate to the muxer -- the
AVCodecParameters.bit_rate field and the AV_PKT_DATA_CPB_PROPERTIES side
data.

IIUC, the problem you're trying to solve is that avconv does not set the
bit_rate field in certain situations. If that's the case, that should be
fixed in avconv. We should not design our muxer behaviour around
limitations of one specific caller.

If you describe more fully what's the situation where bit_rate is not
set properly, I might have some more specific ideas for handling this.
--
Anton Khirnov
Peter Große
2017-01-27 19:16:31 UTC
Permalink
On Fri, 27 Jan 2017 19:34:42 +0100
Post by Anton Khirnov
If we did, I don't remember the conclusion :)
Anyway, I agree that this is a misuse of metadata. There are at least
two methods for passing the stream bitrate to the muxer -- the
AVCodecParameters.bit_rate field and the AV_PKT_DATA_CPB_PROPERTIES side
data.
IIUC, the problem you're trying to solve is that avconv does not set the
bit_rate field in certain situations. If that's the case, that should be
fixed in avconv. We should not design our muxer behaviour around
limitations of one specific caller.
If you describe more fully what's the situation where bit_rate is not
set properly, I might have some more specific ideas for handling this.
Hm I see.

I tried to remux an already encoded MKV stream pulled from an icecast server.
So I used -codec copy and the upstream container didn't contain bitrate
metadata.
Avprobe output from a stream dump see below [1].

For DASH I needed to hint the average bandwith of a stream in the manifest.
Calculating this from the first segment created might be an option, but I felt
more comfortable explicitly hinting the bandwidth via a metadata option.

Other than for the manifest, this value isn't used anywhere, as far as I
remember.

I thought the parser for the adaptation_set option is complex enough. I tried
to avoid writing another one for adding metadata to individual streams.

Yes, the obvious solution would be to fix the upstream encoder. But I'm not
sure, whether I get all possible codecs to write an average bitrate to their
stream metadata.

Regards
Peter

[1]
avprobe version v13_dev0-754-g87afb34, Copyright (c) 2007-2017 the Libav
developers built on Jan 26 2017 22:50:58 with gcc 4.9.4 (Gentoo 4.9.4 p1.0,
pie-0.6.4) [matroska,webm @ 0x170fba0] Unknown entry 0x56BB
[matroska,webm @ 0x170fba0] Unknown entry 0x56BB
[matroska,webm @ 0x170fba0] Unknown entry 0x56BB
Input #0, matroska,webm, from 'test.mkv':
Metadata:
ICY-NAME : no name
ICY-METADATA : 1
ICY-PUB : 0
ENCODER : Lavf57.56.100
Duration: 00:00:19.76, start: 0.000000, bitrate: N/A
Stream #0:0: Video: vp8
yuv420p, 1920x1080, PAR 1:1 DAR 16:9
25 fps, 1k tbn (default)
Metadata:
ENCODER : Lavc56.60.100 libvpx
DURATION : 00:00:19.760000000
Stream #0:1: Video: vp8
yuv420p, 720x576, PAR 64:45 DAR 16:9
25 fps, 1k tbn (default)
Metadata:
ENCODER : Lavc56.60.100 libvpx
DURATION : 00:00:19.720000000
Stream #0:2: Audio: opus
48000 Hz, mono, fltp (default)
Metadata:
ENCODER : Lavc56.60.100 libopus
DURATION : 00:00:19.726000000
Stream #0:3: Audio: opus
48000 Hz, mono, fltp (default)
Metadata:
ENCODER : Lavc56.60.100 libopus
DURATION : 00:00:19.726000000
Stream #0:4: Audio: opus
48000 Hz, mono, fltp (default)
Metadata:
ENCODER : Lavc56.60.100 libopus
DURATION : 00:00:19.726000000
Stream #0:5: Audio: mp3
44100 Hz, 1 channels, s16p, 96 kb/s (default)
Metadata:
ENCODER : Lavc56.60.100 libmp3lame
DURATION : 00:00:19.726000000
Stream #0:6: Audio: mp3
44100 Hz, 1 channels, s16p, 96 kb/s (default)
Metadata:
ENCODER : Lavc56.60.100 libmp3lame
DURATION : 00:00:19.726000000
Stream #0:7: Audio: mp3
44100 Hz, 1 channels, s16p, 96 kb/s (default)
Metadata:
ENCODER : Lavc56.60.100 libmp3lame
DURATION : 00:00:19.726000000
# avprobe output
Anton Khirnov
2017-01-27 20:08:16 UTC
Permalink
Quoting Peter Große (2017-01-27 20:16:31)
Post by Peter Große
On Fri, 27 Jan 2017 19:34:42 +0100
Post by Anton Khirnov
If we did, I don't remember the conclusion :)
Anyway, I agree that this is a misuse of metadata. There are at least
two methods for passing the stream bitrate to the muxer -- the
AVCodecParameters.bit_rate field and the AV_PKT_DATA_CPB_PROPERTIES side
data.
IIUC, the problem you're trying to solve is that avconv does not set the
bit_rate field in certain situations. If that's the case, that should be
fixed in avconv. We should not design our muxer behaviour around
limitations of one specific caller.
If you describe more fully what's the situation where bit_rate is not
set properly, I might have some more specific ideas for handling this.
Hm I see.
I tried to remux an already encoded MKV stream pulled from an icecast server.
So I used -codec copy and the upstream container didn't contain bitrate
metadata.
Avprobe output from a stream dump see below [1].
For DASH I needed to hint the average bandwith of a stream in the manifest.
Calculating this from the first segment created might be an option, but I felt
more comfortable explicitly hinting the bandwidth via a metadata option.
Other than for the manifest, this value isn't used anywhere, as far as I
remember.
I thought the parser for the adaptation_set option is complex enough. I tried
to avoid writing another one for adding metadata to individual streams.
Yes, the obvious solution would be to fix the upstream encoder. But I'm not
sure, whether I get all possible codecs to write an average bitrate to their
stream metadata.
Actually I'd say the issue is just that matroska does not have a field
for storing the bitrate. And the right workaround for that issue would
be allowing avconv to override the muxer bitrate for streamcopy.

The following code should make -b work for streamcopy, then you can just
pull the information from AVCodecParameters.bit_rate:

diff --git a/avconv.c b/avconv.c
index fe60625..94b6da2 100644
--- a/avconv.c
+++ b/avconv.c
@@ -1852,6 +1852,9 @@ static int init_output_stream_streamcopy(OutputStream *ost)

ost->st->time_base = ist->st->time_base;

+ if (ost->bitrate_override)
+ par_dst->bit_rate = ost->bitrate_override;
+
if (ist->st->nb_side_data) {
ost->st->side_data = av_realloc_array(NULL, ist->st->nb_side_data,
sizeof(*ist->st->side_data));
diff --git a/avconv.h b/avconv.h
index 6360f76..3c3f0ef 100644
--- a/avconv.h
+++ b/avconv.h
@@ -162,6 +162,8 @@ typedef struct OptionsContext {
int nb_sample_fmts;
SpecifierOpt *qscale;
int nb_qscale;
+ SpecifierOpt *bitrates;
+ int nb_bitrates;
SpecifierOpt *forced_key_frames;
int nb_forced_key_frames;
SpecifierOpt *force_fps;
@@ -382,6 +384,9 @@ typedef struct OutputStream {
int forced_kf_index;
char *forced_keyframes;

+ // the bitrate to send to the muxer for streamcopy
+ int bitrate_override;
+
char *logfile_prefix;
FILE *logfile;

diff --git a/avconv_opt.c b/avconv_opt.c
index 8b43f0f..e078a0b 100644
--- a/avconv_opt.c
+++ b/avconv_opt.c
@@ -952,6 +952,7 @@ static OutputStream *new_output_stream(OptionsContext *o, AVFormatContext *oc, e
const char *bsfs = NULL;
char *next, *codec_tag = NULL;
double qscale = -1;
+ int bitrate = 0;

if (!st) {
av_log(NULL, AV_LOG_FATAL, "Could not alloc stream.\n");
@@ -1091,6 +1092,14 @@ static OutputStream *new_output_stream(OptionsContext *o, AVFormatContext *oc, e
ost->enc_ctx->global_quality = FF_QP2LAMBDA * qscale;
}

+ MATCH_PER_STREAM_OPT(bitrates, i, bitrate, oc, st);
+ if (bitrate > 0) {
+ if (ost->stream_copy)
+ ost->bitrate_override = bitrate;
+ else
+ ost->enc_ctx->bit_rate = bitrate;
+ }
+
ost->max_muxing_queue_size = 128;
MATCH_PER_STREAM_OPT(max_muxing_queue_size, i, ost->max_muxing_queue_size, oc, st);
ost->max_muxing_queue_size *= sizeof(AVPacket);
@@ -2570,6 +2579,8 @@ const OptionDef options[] = {
{ "qscale", HAS_ARG | OPT_EXPERT | OPT_DOUBLE |
OPT_SPEC | OPT_OUTPUT, { .off = OFFSET(qscale) },
"use fixed quality scale (VBR)", "q" },
+ { "b", HAS_ARG | OPT_INT | OPT_SPEC | OPT_OUTPUT, { .off = OFFSET(bitrates) },
+ "set stream bitrate in bits/second", "bitrate" },
{ "filter", HAS_ARG | OPT_STRING | OPT_SPEC | OPT_OUTPUT, { .off = OFFSET(filters) },
"set stream filterchain", "filter_list" },
{ "filter_script", HAS_ARG | OPT_STRING | OPT_SPEC | OPT_OUTPUT, { .off = OFFSET(filter_scripts) },
--
Anton Khirnov
Peter Große
2017-01-28 00:39:21 UTC
Permalink
On Fri, 27 Jan 2017 21:08:16 +0100
Post by Anton Khirnov
Actually I'd say the issue is just that matroska does not have a field
for storing the bitrate. And the right workaround for that issue would
be allowing avconv to override the muxer bitrate for streamcopy.
The following code should make -b work for streamcopy, then you can just
That works like a charm ;)

So I drop my patch.

Thanks ;)

Regards
Peter
Peter Große
2017-01-26 23:25:13 UTC
Permalink
Bandwidth information is required in the manifest, but not always
provided by the demuxer. If not available via metadata calculate
bandwith based on the size and duration of the first segment.

Signed-off-by: Peter Große <***@friiks.de>
---
libavformat/dashenc.c | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 1cd9563..1b12d4f 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -846,6 +846,17 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
if (ret < 0)
break;
}
+
+ if (!os->bit_rate) {
+ // calculate average bitrate of first segment
+ double bitrate = (int)( (double) range_length * 8.0 * AV_TIME_BASE / (double)(os->max_pts - os->start_pts) );
+ if (bitrate >= 0 && bitrate <= INT64_MAX) {
+ os->bit_rate = bitrate;
+ snprintf(os->bandwidth_str, sizeof(os->bandwidth_str),
+ " bandwidth=\"%d\"", os->bit_rate);
+ }
+ }
+
add_segment(os, filename, os->start_pts, os->max_pts - os->start_pts, start_pos, range_length, index_length);
av_log(s, AV_LOG_VERBOSE, "Representation %d media segment %d written to: %s\n", i, os->segment_index, full_path);
}
--
2.10.2
Martin Storsjö
2017-01-27 13:21:20 UTC
Permalink
Post by Peter Große
Bandwidth information is required in the manifest, but not always
provided by the demuxer. If not available via metadata calculate
bandwith based on the size and duration of the first segment.
---
libavformat/dashenc.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 1cd9563..1b12d4f 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -846,6 +846,17 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
if (ret < 0)
break;
}
+
+ if (!os->bit_rate) {
+ // calculate average bitrate of first segment
+ double bitrate = (int)( (double) range_length * 8.0 * AV_TIME_BASE / (double)(os->max_pts - os->start_pts) );
+ if (bitrate >= 0 && bitrate <= INT64_MAX) {
+ os->bit_rate = bitrate;
+ snprintf(os->bandwidth_str, sizeof(os->bandwidth_str),
+ " bandwidth=\"%d\"", os->bit_rate);
+ }
+ }
+
add_segment(os, filename, os->start_pts, os->max_pts - os->start_pts, start_pos, range_length, index_length);
av_log(s, AV_LOG_VERBOSE, "Representation %d media segment %d written to: %s\n", i, os->segment_index, full_path);
}
--
2.10.2
This probably is fine with me.

We can have $Bandwidth$ as part of the segment filename template - I guess
this is too late to get it right for those cases? Although I don't mind if
we don't support that case when we don't have a nominal bitrate set and
try to use it in the template.

// Martin
Peter Große
2017-01-27 17:51:22 UTC
Permalink
On Fri, 27 Jan 2017 15:21:20 +0200 (EET)
Post by Martin Storsjö
Post by Peter Große
Bandwidth information is required in the manifest, but not always
provided by the demuxer. If not available via metadata calculate
bandwith based on the size and duration of the first segment.
This probably is fine with me.
We can have $Bandwidth$ as part of the segment filename template - I guess
this is too late to get it right for those cases? Although I don't mind if
we don't support that case when we don't have a nominal bitrate set and
try to use it in the template.
Than we have to postpone opening any file handle until the first call to
dash_flush, right?
Which should be possible, but requires some code to be moved. Using a dyn_buf
(patch 10) might also help here, so writing to files happens as late a possible.

Regards
Peter
Martin Storsjö
2017-01-27 20:24:35 UTC
Permalink
Post by Peter Große
On Fri, 27 Jan 2017 15:21:20 +0200 (EET)
Post by Martin Storsjö
Post by Peter Große
Bandwidth information is required in the manifest, but not always
provided by the demuxer. If not available via metadata calculate
bandwith based on the size and duration of the first segment.
This probably is fine with me.
We can have $Bandwidth$ as part of the segment filename template - I guess
this is too late to get it right for those cases? Although I don't mind if
we don't support that case when we don't have a nominal bitrate set and
try to use it in the template.
Than we have to postpone opening any file handle until the first call to
dash_flush, right?
Which should be possible, but requires some code to be moved. Using a dyn_buf
(patch 10) might also help here, so writing to files happens as late a possible.
Hmm, indeed. If we postpone this patch until after that, this should be
easier to handle right. (The alternative would be to just count the
bitrate of the codec payload as we pass into the chained muxer; the
bit_rate value we use otherwise also refers to only the codec payload data
not including the container, but since we're probably doing the dyn_buf
stuff anyway, that's even simpler.)

// Martin
Peter Große
2017-01-28 01:00:39 UTC
Permalink
On Fri, 27 Jan 2017 22:24:35 +0200 (EET)
Post by Martin Storsjö
Post by Peter Große
Than we have to postpone opening any file handle until the first call to
dash_flush, right?
Which should be possible, but requires some code to be moved. Using a
dyn_buf (patch 10) might also help here, so writing to files happens as
late a possible.
Hmm, indeed. If we postpone this patch until after that, this should be
easier to handle right. (The alternative would be to just count the
bitrate of the codec payload as we pass into the chained muxer; the
bit_rate value we use otherwise also refers to only the codec payload data
not including the container, but since we're probably doing the dyn_buf
stuff anyway, that's even simpler.)
Mh, after thinking about this a bit more, I'm not sure the complexity is worth
it.

I'd have to store the contents of the dyn_buf containing the init data somewhere
until the first packet is ready to be flushed so I can calculate the average
bitrate to be used in the filenames. Only then I know the init segment filename.

This might be ok for mp4, since everything happens within the first call to
dash_flush. But for webm this doesn't work that easy.

So maybe the patch is ok for now.

Regards
Peter
Martin Storsjö
2017-01-28 19:51:25 UTC
Permalink
Post by Peter Große
On Fri, 27 Jan 2017 22:24:35 +0200 (EET)
Post by Martin Storsjö
Post by Peter Große
Than we have to postpone opening any file handle until the first call to
dash_flush, right?
Which should be possible, but requires some code to be moved. Using a
dyn_buf (patch 10) might also help here, so writing to files happens as
late a possible.
Hmm, indeed. If we postpone this patch until after that, this should be
easier to handle right. (The alternative would be to just count the
bitrate of the codec payload as we pass into the chained muxer; the
bit_rate value we use otherwise also refers to only the codec payload data
not including the container, but since we're probably doing the dyn_buf
stuff anyway, that's even simpler.)
Mh, after thinking about this a bit more, I'm not sure the complexity is worth
it.
I'd have to store the contents of the dyn_buf containing the init data somewhere
until the first packet is ready to be flushed so I can calculate the average
bitrate to be used in the filenames. Only then I know the init segment filename.
This might be ok for mp4, since everything happens within the first call to
dash_flush. But for webm this doesn't work that easy.
So maybe the patch is ok for now.
Ok - it's not worth complicating the code too much just for that
pathological case.

// Martin
Peter Große
2017-01-26 23:25:15 UTC
Permalink
Signed-off-by: Peter Große <***@friiks.de>
---
libavformat/dashenc.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index c561ad1..a0c7811 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -460,10 +460,19 @@ static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_ind
{
DASHContext *c = s->priv_data;
AdaptationSet *as = &c->as[as_index];
+ AVDictionaryEntry *lang, *role;
int i;

- avio_printf(out, "\t\t<AdaptationSet id=\"%s\" contentType=\"%s\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n",
+ avio_printf(out, "\t\t<AdaptationSet id=\"%s\" contentType=\"%s\" segmentAlignment=\"true\" bitstreamSwitching=\"true\"",
as->id, as->media_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
+ lang = av_dict_get(as->metadata, "language", NULL, 0);
+ if (lang)
+ avio_printf(out, " lang=\"%s\"", lang->value);
+ avio_printf(out, ">\n");
+
+ role = av_dict_get(as->metadata, "role", NULL, 0);
+ if (role)
+ avio_printf(out, "\t\t\t<Role schemeIdUri=\"urn:mpeg:dash:role:2011\" value=\"%s\"/>\n", role->value);

for (i = 0; i < s->nb_streams; i++) {
OutputStream *os = &c->streams[i];
@@ -669,6 +678,14 @@ static int write_manifest(AVFormatContext *s, int final)
return ff_rename(temp_filename, s->filename);
}

+static int dict_copy_entry(AVDictionary **dst, const AVDictionary *src, const char *key)
+{
+ AVDictionaryEntry *entry = av_dict_get(src, key, NULL, 0);
+ if (entry)
+ av_dict_set(dst, key, entry->value, AV_DICT_DONT_OVERWRITE);
+ return 0;
+}
+
static int dash_write_header(AVFormatContext *s)
{
DASHContext *c = s->priv_data;
@@ -713,6 +730,7 @@ static int dash_write_header(AVFormatContext *s)

for (i = 0; i < s->nb_streams; i++) {
OutputStream *os = &c->streams[i];
+ AdaptationSet *as = &c->as[os->as_idx - 1];
AVFormatContext *ctx;
AVStream *st;
AVDictionary *opts = NULL;
@@ -743,6 +761,10 @@ static int dash_write_header(AVFormatContext *s)
}
}

+ // copy AdaptationSet language and role from stream metadata
+ dict_copy_entry(&as->metadata, s->streams[i]->metadata, "language");
+ dict_copy_entry(&as->metadata, s->streams[i]->metadata, "role");
+
ctx = avformat_alloc_context();
if (!ctx) {
ret = AVERROR(ENOMEM);
--
2.10.2
Martin Storsjö
2017-01-27 13:42:57 UTC
Permalink
Post by Peter Große
---
libavformat/dashenc.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index c561ad1..a0c7811 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -460,10 +460,19 @@ static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_ind
{
DASHContext *c = s->priv_data;
AdaptationSet *as = &c->as[as_index];
+ AVDictionaryEntry *lang, *role;
int i;
- avio_printf(out, "\t\t<AdaptationSet id=\"%s\" contentType=\"%s\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n",
+ avio_printf(out, "\t\t<AdaptationSet id=\"%s\" contentType=\"%s\" segmentAlignment=\"true\" bitstreamSwitching=\"true\"",
as->id, as->media_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
+ lang = av_dict_get(as->metadata, "language", NULL, 0);
+ if (lang)
+ avio_printf(out, " lang=\"%s\"", lang->value);
+ avio_printf(out, ">\n");
+
+ role = av_dict_get(as->metadata, "role", NULL, 0);
+ if (role)
+ avio_printf(out, "\t\t\t<Role schemeIdUri=\"urn:mpeg:dash:role:2011\" value=\"%s\"/>\n", role->value);
for (i = 0; i < s->nb_streams; i++) {
OutputStream *os = &c->streams[i];
@@ -669,6 +678,14 @@ static int write_manifest(AVFormatContext *s, int final)
return ff_rename(temp_filename, s->filename);
}
+static int dict_copy_entry(AVDictionary **dst, const AVDictionary *src, const char *key)
+{
+ AVDictionaryEntry *entry = av_dict_get(src, key, NULL, 0);
+ if (entry)
+ av_dict_set(dst, key, entry->value, AV_DICT_DONT_OVERWRITE);
+ return 0;
+}
+
static int dash_write_header(AVFormatContext *s)
{
DASHContext *c = s->priv_data;
@@ -713,6 +730,7 @@ static int dash_write_header(AVFormatContext *s)
for (i = 0; i < s->nb_streams; i++) {
OutputStream *os = &c->streams[i];
+ AdaptationSet *as = &c->as[os->as_idx - 1];
AVFormatContext *ctx;
AVStream *st;
AVDictionary *opts = NULL;
@@ -743,6 +761,10 @@ static int dash_write_header(AVFormatContext *s)
}
}
+ // copy AdaptationSet language and role from stream metadata
+ dict_copy_entry(&as->metadata, s->streams[i]->metadata, "language");
+ dict_copy_entry(&as->metadata, s->streams[i]->metadata, "role");
+
ctx = avformat_alloc_context();
if (!ctx) {
ret = AVERROR(ENOMEM);
--
2.10.2
This seems ok, even though it (mis)uses metadata for this kind of info. Or
what does Anton think of this one?

// Martin
Peter Große
2017-01-26 23:25:14 UTC
Permalink
This patch is based on the stream assignment code in webmdashenc.

Additional changes:
* Default to one AdaptationSet per stream

Previously all mapped streams of a media type (video, audio) where assigned
to a single AdaptationSet. Using the DASH live profile it is mandatory, that
the segments of all representations are aligned, which is currently not
enforced. This leads to problems when using video streams with different
key frame intervals. So to play safe, default to one AdaptationSet per stream,
unless overwritten by explicit assignment
* Make sure all streams are assigned to exactly one AdaptationSet

Signed-off-by: Peter Große <***@friiks.de>
---
libavformat/dashenc.c | 193 ++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 162 insertions(+), 31 deletions(-)

diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 1b12d4f..c561ad1 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -24,6 +24,7 @@
#include <unistd.h>
#endif

+#include "libavutil/avutil.h"
#include "libavutil/avstring.h"
#include "libavutil/eval.h"
#include "libavutil/intreadwrite.h"
@@ -58,9 +59,15 @@ typedef struct Segment {
int n;
} Segment;

+typedef struct AdaptationSet {
+ char id[10];
+ enum AVMediaType media_type;
+ AVDictionary *metadata;
+} AdaptationSet;
+
typedef struct OutputStream {
AVFormatContext *ctx;
- int ctx_inited;
+ int ctx_inited, as_idx;
uint8_t iobuf[32768];
AVIOContext *out;
int duration_written;
@@ -79,6 +86,9 @@ typedef struct OutputStream {

typedef struct DASHContext {
const AVClass *class; /* Class for private options. */
+ char *adaptation_sets;
+ AdaptationSet *as;
+ int nb_as;
int window_size;
int extra_window_size;
int min_seg_duration;
@@ -87,7 +97,7 @@ typedef struct DASHContext {
int use_timeline;
int single_file;
OutputStream *streams;
- int has_video, has_audio;
+ int has_video;
int64_t last_duration;
int64_t total_duration;
char availability_start_time[100];
@@ -176,6 +186,16 @@ static void dash_free(AVFormatContext *s)
{
DASHContext *c = s->priv_data;
int i, j;
+
+ if (c->as) {
+ for (i = 0; i < c->nb_as; i++) {
+ if (&c->as[i].metadata)
+ av_dict_free(&c->as[i].metadata);
+ }
+ av_freep(&c->as);
+ c->nb_as = 0;
+ }
+
if (!c->streams)
return;
for (i = 0; i < s->nb_streams; i++) {
@@ -436,12 +456,144 @@ static void format_date_now(char *buf, int size)
}
}

+static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_index)
+{
+ DASHContext *c = s->priv_data;
+ AdaptationSet *as = &c->as[as_index];
+ int i;
+
+ avio_printf(out, "\t\t<AdaptationSet id=\"%s\" contentType=\"%s\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n",
+ as->id, as->media_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
+
+ for (i = 0; i < s->nb_streams; i++) {
+ OutputStream *os = &c->streams[i];
+
+ if (os->as_idx - 1 != as_index)
+ continue;
+
+ if (as->media_type == AVMEDIA_TYPE_VIDEO) {
+ avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/mp4\" codecs=\"%s\"%s width=\"%d\" height=\"%d\">\n",
+ i, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->width, s->streams[i]->codecpar->height);
+ } else {
+ avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/mp4\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n",
+ i, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->sample_rate);
+ avio_printf(out, "\t\t\t\t<AudioChannelConfiguration schemeIdUri=\"urn:mpeg:dash:23003:3:audio_channel_configuration:2011\" value=\"%d\" />\n",
+ s->streams[i]->codecpar->channels);
+ }
+ output_segment_list(os, out, c);
+ avio_printf(out, "\t\t\t</Representation>\n");
+ }
+ avio_printf(out, "\t\t</AdaptationSet>\n");
+
+ return 0;
+}
+
+static int parse_adaptation_sets(AVFormatContext *s)
+{
+ DASHContext *c = s->priv_data;
+ char *p = c->adaptation_sets;
+ char *q;
+ enum { new_set, parse_id, parsing_streams } state;
+ int i;
+
+ /* default: one AdaptationSet for each stream */
+ if (!p) {
+ void *mem = av_mallocz(sizeof(*c->as) * s->nb_streams);
+ if (!mem)
+ return AVERROR(ENOMEM);
+ c->as = mem;
+ c->nb_as = s->nb_streams;
+
+ for (i = 0; i < s->nb_streams; i++) {
+ AdaptationSet *as = &c->as[i];
+ OutputStream *os = &c->streams[i];
+ snprintf(as->id, sizeof(as->id), "%d", i);
+ as->metadata = NULL;
+ as->media_type = s->streams[i]->codecpar->codec_type;
+ os->as_idx = i + 1;
+ }
+ return 0;
+ }
+
+ /* syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on */
+ state = new_set;
+ while (p < c->adaptation_sets + strlen(c->adaptation_sets)) {
+ if (*p == ' ') {
+ continue;
+ } else if (state == new_set && !strncmp(p, "id=", 3)) {
+ AdaptationSet *as;
+ void *mem = av_realloc(c->as, sizeof(*c->as) * (c->nb_as + 1));
+ if (!mem)
+ return AVERROR(ENOMEM);
+ c->as = mem;
+ ++c->nb_as;
+
+ as = &c->as[c->nb_as - 1];
+ as->metadata = NULL;
+ as->media_type = AVMEDIA_TYPE_UNKNOWN;
+
+ p += 3; // consume "id="
+ q = as->id;
+ while (*p != ',') *q++ = *p++;
+ *q = 0;
+ p++;
+ state = parse_id;
+ } else if (state == parse_id && !strncmp(p, "streams=", 8)) {
+ p += 8; // consume "streams="
+ state = parsing_streams;
+ } else if (state == parsing_streams) {
+ struct AdaptationSet *as = &c->as[c->nb_as - 1];
+ OutputStream *os;
+ char *stream_identifier;
+
+ q = p;
+ while (*q != '\0' && *q != ',' && *q != ' ') q++;
+ stream_identifier = av_strndup(p, q - p);
+
+ i = strtol(stream_identifier, NULL, 10);
+ if (i < 0 || i >= s->nb_streams) {
+ av_log(s, AV_LOG_ERROR, "Selected stream \"%s\" not found!\n", stream_identifier);
+ return -1;
+ }
+
+ os = &c->streams[i];
+ if (as->media_type == AVMEDIA_TYPE_UNKNOWN) {
+ as->media_type = s->streams[i]->codecpar->codec_type;
+ } else if (as->media_type != s->streams[i]->codecpar->codec_type) {
+ av_log(s, AV_LOG_ERROR, "Mixing codec types within an AdaptationSet is not allowed\n");
+ return -1;
+ } else if (os->as_idx) {
+ av_log(s, AV_LOG_ERROR, "Assigning a stream to more than one AdaptationSet is not allowed\n");
+ return -1;
+ }
+ os->as_idx = c->nb_as;
+ av_free(stream_identifier);
+
+ if (*q == '\0') break;
+ if (*q == ' ') state = new_set;
+ p = ++q;
+ } else {
+ return -1;
+ }
+ }
+
+ /* check for unassigned streams */
+ for (i = 0; i < s->nb_streams; i++) {
+ OutputStream *os = &c->streams[i];
+ if (!os->as_idx) {
+ av_log(s, AV_LOG_ERROR, "Stream %d is not mapped to an AdaptationSet\n", i);
+ return -1;
+ }
+ }
+ return 0;
+}
+
static int write_manifest(AVFormatContext *s, int final)
{
DASHContext *c = s->priv_data;
AVIOContext *out;
char temp_filename[1024];
- int ret, i, as_id = 0;
+ int ret, i;
AVDictionaryEntry *title = av_dict_get(s->metadata, "title", NULL, 0);

snprintf(temp_filename, sizeof(temp_filename), "%s.tmp", s->filename);
@@ -506,32 +658,9 @@ static int write_manifest(AVFormatContext *s, int final)
avio_printf(out, "\t<Period id=\"0\" start=\"PT0.0S\">\n");
}

- if (c->has_video) {
- avio_printf(out, "\t\t<AdaptationSet id=\"%d\" contentType=\"video\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n", as_id++);
- for (i = 0; i < s->nb_streams; i++) {
- AVStream *st = s->streams[i];
- OutputStream *os = &c->streams[i];
- if (st->codecpar->codec_type != AVMEDIA_TYPE_VIDEO)
- continue;
- avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/mp4\" codecs=\"%s\"%s width=\"%d\" height=\"%d\">\n", i, os->codec_str, os->bandwidth_str, st->codecpar->width, st->codecpar->height);
- output_segment_list(&c->streams[i], out, c);
- avio_printf(out, "\t\t\t</Representation>\n");
- }
- avio_printf(out, "\t\t</AdaptationSet>\n");
- }
- if (c->has_audio) {
- avio_printf(out, "\t\t<AdaptationSet id=\"%d\" contentType=\"audio\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n", as_id++);
- for (i = 0; i < s->nb_streams; i++) {
- AVStream *st = s->streams[i];
- OutputStream *os = &c->streams[i];
- if (st->codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
- continue;
- avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/mp4\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n", i, os->codec_str, os->bandwidth_str, st->codecpar->sample_rate);
- avio_printf(out, "\t\t\t\t<AudioChannelConfiguration schemeIdUri=\"urn:mpeg:dash:23003:3:audio_channel_configuration:2011\" value=\"%d\" />\n", st->codecpar->channels);
- output_segment_list(&c->streams[i], out, c);
- avio_printf(out, "\t\t\t</Representation>\n");
- }
- avio_printf(out, "\t\t</AdaptationSet>\n");
+ for (i = 0; i < c->nb_as; i++) {
+ if ((ret = write_adaptation_set(s, out, i)) < 0)
+ return ret;
}
avio_printf(out, "\t</Period>\n");
avio_printf(out, "</MPD>\n");
@@ -579,6 +708,9 @@ static int dash_write_header(AVFormatContext *s)
goto fail;
}

+ if ((ret = parse_adaptation_sets(s)) < 0)
+ goto fail;
+
for (i = 0; i < s->nb_streams; i++) {
OutputStream *os = &c->streams[i];
AVFormatContext *ctx;
@@ -669,8 +801,6 @@ static int dash_write_header(AVFormatContext *s)
s->avoid_negative_ts = ctx->avoid_negative_ts;
if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
c->has_video = 1;
- else if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
- c->has_audio = 1;

set_codec_str(s, st->codecpar, os->codec_str, sizeof(os->codec_str));
os->first_pts = AV_NOPTS_VALUE;
@@ -997,6 +1127,7 @@ static int dash_write_trailer(AVFormatContext *s)
#define OFFSET(x) offsetof(DASHContext, x)
#define E AV_OPT_FLAG_ENCODING_PARAM
static const AVOption options[] = {
+ { "adaptation_sets", "Adaptation sets. Syntax: id=0,streams=0,1,2 id=1,streams=3,4 and so on", OFFSET(adaptation_sets), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
{ "window_size", "number of segments kept in the manifest", OFFSET(window_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, E },
{ "extra_window_size", "number of segments kept outside of the manifest before removing from disk", OFFSET(extra_window_size), AV_OPT_TYPE_INT, { .i64 = 5 }, 0, INT_MAX, E },
{ "min_seg_duration", "minimum segment duration (in microseconds)", OFFSET(min_seg_duration), AV_OPT_TYPE_INT64, { .i64 = 5000000 }, 0, INT_MAX, E },
--
2.10.2
Martin Storsjö
2017-01-27 13:39:30 UTC
Permalink
Post by Peter Große
This patch is based on the stream assignment code in webmdashenc.
This muxer isn't available in libav, so the reference doesn't really work
in context here.
Post by Peter Große
* Default to one AdaptationSet per stream
Previously all mapped streams of a media type (video, audio) where assigned
to a single AdaptationSet. Using the DASH live profile it is mandatory, that
the segments of all representations are aligned, which is currently not
enforced. This leads to problems when using video streams with different
key frame intervals. So to play safe, default to one AdaptationSet per stream,
unless overwritten by explicit assignment
Is there any way to easily get it back to the original behaviour, e.g.
tell it to group all audio tracks together and all video tracks together,
without having to manually specify the stream numbers?
Post by Peter Große
* Make sure all streams are assigned to exactly one AdaptationSet
---
libavformat/dashenc.c | 193 ++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 162 insertions(+), 31 deletions(-)
diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 1b12d4f..c561ad1 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -24,6 +24,7 @@
#include <unistd.h>
#endif
+#include "libavutil/avutil.h"
#include "libavutil/avstring.h"
#include "libavutil/eval.h"
#include "libavutil/intreadwrite.h"
@@ -58,9 +59,15 @@ typedef struct Segment {
int n;
} Segment;
+typedef struct AdaptationSet {
+ char id[10];
+ enum AVMediaType media_type;
+ AVDictionary *metadata;
+} AdaptationSet;
+
typedef struct OutputStream {
AVFormatContext *ctx;
- int ctx_inited;
+ int ctx_inited, as_idx;
uint8_t iobuf[32768];
AVIOContext *out;
int duration_written;
@@ -79,6 +86,9 @@ typedef struct OutputStream {
typedef struct DASHContext {
const AVClass *class; /* Class for private options. */
+ char *adaptation_sets;
+ AdaptationSet *as;
+ int nb_as;
int window_size;
int extra_window_size;
int min_seg_duration;
@@ -87,7 +97,7 @@ typedef struct DASHContext {
int use_timeline;
int single_file;
OutputStream *streams;
- int has_video, has_audio;
+ int has_video;
int64_t last_duration;
int64_t total_duration;
char availability_start_time[100];
@@ -176,6 +186,16 @@ static void dash_free(AVFormatContext *s)
{
DASHContext *c = s->priv_data;
int i, j;
+
+ if (c->as) {
+ for (i = 0; i < c->nb_as; i++) {
+ if (&c->as[i].metadata)
+ av_dict_free(&c->as[i].metadata);
+ }
+ av_freep(&c->as);
+ c->nb_as = 0;
+ }
+
if (!c->streams)
return;
for (i = 0; i < s->nb_streams; i++) {
@@ -436,12 +456,144 @@ static void format_date_now(char *buf, int size)
}
}
+static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_index)
+{
+ DASHContext *c = s->priv_data;
+ AdaptationSet *as = &c->as[as_index];
+ int i;
+
+ avio_printf(out, "\t\t<AdaptationSet id=\"%s\" contentType=\"%s\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n",
+ as->id, as->media_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
+
+ for (i = 0; i < s->nb_streams; i++) {
+ OutputStream *os = &c->streams[i];
+
+ if (os->as_idx - 1 != as_index)
+ continue;
+
+ if (as->media_type == AVMEDIA_TYPE_VIDEO) {
+ avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/mp4\" codecs=\"%s\"%s width=\"%d\" height=\"%d\">\n",
+ i, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->width, s->streams[i]->codecpar->height);
+ } else {
+ avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/mp4\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n",
+ i, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->sample_rate);
+ avio_printf(out, "\t\t\t\t<AudioChannelConfiguration schemeIdUri=\"urn:mpeg:dash:23003:3:audio_channel_configuration:2011\" value=\"%d\" />\n",
+ s->streams[i]->codecpar->channels);
+ }
+ output_segment_list(os, out, c);
+ avio_printf(out, "\t\t\t</Representation>\n");
+ }
+ avio_printf(out, "\t\t</AdaptationSet>\n");
+
+ return 0;
+}
+
+static int parse_adaptation_sets(AVFormatContext *s)
+{
+ DASHContext *c = s->priv_data;
+ char *p = c->adaptation_sets;
+ char *q;
+ enum { new_set, parse_id, parsing_streams } state;
+ int i;
+
+ /* default: one AdaptationSet for each stream */
+ if (!p) {
+ void *mem = av_mallocz(sizeof(*c->as) * s->nb_streams);
+ if (!mem)
+ return AVERROR(ENOMEM);
+ c->as = mem;
+ c->nb_as = s->nb_streams;
+
+ for (i = 0; i < s->nb_streams; i++) {
+ AdaptationSet *as = &c->as[i];
+ OutputStream *os = &c->streams[i];
+ snprintf(as->id, sizeof(as->id), "%d", i);
+ as->metadata = NULL;
+ as->media_type = s->streams[i]->codecpar->codec_type;
+ os->as_idx = i + 1;
+ }
+ return 0;
+ }
+
+ /* syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on */
+ state = new_set;
+ while (p < c->adaptation_sets + strlen(c->adaptation_sets)) {
Isn't this equivalent to "while (*p) {"?
Post by Peter Große
+ if (*p == ' ') {
+ continue;
If this ever happens, wouldn't you end up in an infinite loop?
Post by Peter Große
+ } else if (state == new_set && !strncmp(p, "id=", 3)) {
Instead of strncmp, you can also use av_strstart to avoid having to
hardcode the number of chars.
Post by Peter Große
+ AdaptationSet *as;
+ void *mem = av_realloc(c->as, sizeof(*c->as) * (c->nb_as + 1));
+ if (!mem)
+ return AVERROR(ENOMEM);
+ c->as = mem;
+ ++c->nb_as;
+
+ as = &c->as[c->nb_as - 1];
+ as->metadata = NULL;
maybe memset(as, 0, sizeof(*as))? Then you'd reduce the risk of missing to
initialize a new field here, if AdaptationSet is expanded later.
Post by Peter Große
+ as->media_type = AVMEDIA_TYPE_UNKNOWN;
+
+ p += 3; // consume "id="
av_strstart can avoid hardcoding this number here
Post by Peter Große
+ q = as->id;
+ while (*p != ',') *q++ = *p++;
Keep normal indentation for the assignment.

The loop lacks a condition for *p == '\0', so unless there's a trailing
comma somewhere, you'll run out of bounds and crash.

It also lacks a check for the buffer size of as->id.

Something like this could also work:

n = strcspn(p, ",");
snprintf(as->id, sizeof(as->id), "%*s", n, p);
p += n;
if (*p)
p++;
Post by Peter Große
+ *q = 0;
+ p++;
+ state = parse_id;
+ } else if (state == parse_id && !strncmp(p, "streams=", 8)) {
+ p += 8; // consume "streams="
+ state = parsing_streams;
+ } else if (state == parsing_streams) {
+ struct AdaptationSet *as = &c->as[c->nb_as - 1];
+ OutputStream *os;
+ char *stream_identifier;
+
+ q = p;
+ while (*q != '\0' && *q != ',' && *q != ' ') q++;
+ stream_identifier = av_strndup(p, q - p);
This should also be doable with strcspn(p, " ,"). And copy it into a local
stack buffer (e.g. with snprintf as above, to get bounds checking of that
buffer for free) instead of a strdup of a couple-bytes string.
Post by Peter Große
+
+ i = strtol(stream_identifier, NULL, 10);
+ if (i < 0 || i >= s->nb_streams) {
+ av_log(s, AV_LOG_ERROR, "Selected stream \"%s\" not found!\n", stream_identifier);
If you hit this case, you leak stream_identifier (unless you make it a
local stack buffer as I suggested).
Post by Peter Große
+ return -1;
Please return proper error codes, e.g. AVERROR(EINVAL) here.

// Martin
Peter Große
2017-01-27 18:15:48 UTC
Permalink
On Fri, 27 Jan 2017 15:39:30 +0200 (EET)
Post by Martin Storsjö
Post by Peter Große
This patch is based on the stream assignment code in webmdashenc.
This muxer isn't available in libav, so the reference doesn't really work
in context here.
True, I thought I changed to something like "This patch is based on the stream
assignment code of a webm dash muxer by Vignesh Venkatasubramanian".

So far only a big part of the parse_adaption_set function is copied over, the
rest is basically my own code.
But it looks like, I'll to also make changes to that part (regarding your
comments below) so I might not need the reference anymore?!
Post by Martin Storsjö
Post by Peter Große
* Default to one AdaptationSet per stream
Previously all mapped streams of a media type (video, audio) where
assigned to a single AdaptationSet. Using the DASH live profile it is
mandatory, that the segments of all representations are aligned, which is
currently not enforced. This leads to problems when using video streams
with different key frame intervals. So to play safe, default to one
AdaptationSet per stream, unless overwritten by explicit assignment
Is there any way to easily get it back to the original behaviour, e.g.
tell it to group all audio tracks together and all video tracks together,
without having to manually specify the stream numbers?
Post by Peter Große
* Make sure all streams are assigned to exactly one AdaptationSet
---
libavformat/dashenc.c | 193
++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 162
insertions(+), 31 deletions(-)
diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 1b12d4f..c561ad1 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -24,6 +24,7 @@
#include <unistd.h>
#endif
+#include "libavutil/avutil.h"
#include "libavutil/avstring.h"
#include "libavutil/eval.h"
#include "libavutil/intreadwrite.h"
@@ -58,9 +59,15 @@ typedef struct Segment {
int n;
} Segment;
+typedef struct AdaptationSet {
+ char id[10];
+ enum AVMediaType media_type;
+ AVDictionary *metadata;
+} AdaptationSet;
+
typedef struct OutputStream {
AVFormatContext *ctx;
- int ctx_inited;
+ int ctx_inited, as_idx;
uint8_t iobuf[32768];
AVIOContext *out;
int duration_written;
@@ -79,6 +86,9 @@ typedef struct OutputStream {
typedef struct DASHContext {
const AVClass *class; /* Class for private options. */
+ char *adaptation_sets;
+ AdaptationSet *as;
+ int nb_as;
int window_size;
int extra_window_size;
int min_seg_duration;
@@ -87,7 +97,7 @@ typedef struct DASHContext {
int use_timeline;
int single_file;
OutputStream *streams;
- int has_video, has_audio;
+ int has_video;
int64_t last_duration;
int64_t total_duration;
char availability_start_time[100];
@@ -176,6 +186,16 @@ static void dash_free(AVFormatContext *s)
{
DASHContext *c = s->priv_data;
int i, j;
+
+ if (c->as) {
+ for (i = 0; i < c->nb_as; i++) {
+ if (&c->as[i].metadata)
+ av_dict_free(&c->as[i].metadata);
+ }
+ av_freep(&c->as);
+ c->nb_as = 0;
+ }
+
if (!c->streams)
return;
for (i = 0; i < s->nb_streams; i++) {
@@ -436,12 +456,144 @@ static void format_date_now(char *buf, int size)
}
}
+static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int
as_index) +{
+ DASHContext *c = s->priv_data;
+ AdaptationSet *as = &c->as[as_index];
+ int i;
+
+ avio_printf(out, "\t\t<AdaptationSet id=\"%s\" contentType=\"%s\"
segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n",
"audio"); +
+ for (i = 0; i < s->nb_streams; i++) {
+ OutputStream *os = &c->streams[i];
+
+ if (os->as_idx - 1 != as_index)
+ continue;
+
+ if (as->media_type == AVMEDIA_TYPE_VIDEO) {
+ avio_printf(out, "\t\t\t<Representation id=\"%d\"
mimeType=\"video/mp4\" codecs=\"%s\"%s width=\"%d\" height=\"%d\">\n",
+ i, os->codec_str, os->bandwidth_str,
s->streams[i]->codecpar->width, s->streams[i]->codecpar->height);
+ } else {
+ avio_printf(out, "\t\t\t<Representation id=\"%d\"
mimeType=\"audio/mp4\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n",
+ i, os->codec_str, os->bandwidth_str,
s->streams[i]->codecpar->sample_rate);
+ avio_printf(out, "\t\t\t\t<AudioChannelConfiguration
schemeIdUri=\"urn:mpeg:dash:23003:3:audio_channel_configuration:2011\"
value=\"%d\" />\n",
+ s->streams[i]->codecpar->channels);
+ }
+ output_segment_list(os, out, c);
+ avio_printf(out, "\t\t\t</Representation>\n");
+ }
+ avio_printf(out, "\t\t</AdaptationSet>\n");
+
+ return 0;
+}
+
+static int parse_adaptation_sets(AVFormatContext *s)
+{
+ DASHContext *c = s->priv_data;
+ char *p = c->adaptation_sets;
+ char *q;
+ enum { new_set, parse_id, parsing_streams } state;
+ int i;
+
+ /* default: one AdaptationSet for each stream */
+ if (!p) {
+ void *mem = av_mallocz(sizeof(*c->as) * s->nb_streams);
+ if (!mem)
+ return AVERROR(ENOMEM);
+ c->as = mem;
+ c->nb_as = s->nb_streams;
+
+ for (i = 0; i < s->nb_streams; i++) {
+ AdaptationSet *as = &c->as[i];
+ OutputStream *os = &c->streams[i];
+ snprintf(as->id, sizeof(as->id), "%d", i);
+ as->metadata = NULL;
+ as->media_type = s->streams[i]->codecpar->codec_type;
+ os->as_idx = i + 1;
+ }
+ return 0;
+ }
+
+ /* syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on */
+ state = new_set;
+ while (p < c->adaptation_sets + strlen(c->adaptation_sets)) {
Isn't this equivalent to "while (*p) {"?
Right.
Post by Martin Storsjö
Post by Peter Große
+ if (*p == ' ') {
+ continue;
If this ever happens, wouldn't you end up in an infinite loop?
Also right.
Post by Martin Storsjö
Post by Peter Große
+ } else if (state == new_set && !strncmp(p, "id=", 3)) {
Instead of strncmp, you can also use av_strstart to avoid having to
hardcode the number of chars.
ok.
Post by Martin Storsjö
Post by Peter Große
+ AdaptationSet *as;
+ void *mem = av_realloc(c->as, sizeof(*c->as) * (c->nb_as + 1));
+ if (!mem)
+ return AVERROR(ENOMEM);
+ c->as = mem;
+ ++c->nb_as;
+
+ as = &c->as[c->nb_as - 1];
+ as->metadata = NULL;
maybe memset(as, 0, sizeof(*as))? Then you'd reduce the risk of missing to
initialize a new field here, if AdaptationSet is expanded later.
Should do the trick, yes.
Post by Martin Storsjö
Post by Peter Große
+ as->media_type = AVMEDIA_TYPE_UNKNOWN;
+
+ p += 3; // consume "id="
av_strstart can avoid hardcoding this number here
ok.
Post by Martin Storsjö
Post by Peter Große
+ q = as->id;
+ while (*p != ',') *q++ = *p++;
Keep normal indentation for the assignment.
The loop lacks a condition for *p == '\0', so unless there's a trailing
comma somewhere, you'll run out of bounds and crash.
It also lacks a check for the buffer size of as->id.
n = strcspn(p, ",");
snprintf(as->id, sizeof(as->id), "%*s", n, p);
p += n;
if (*p)
p++;
Right.
Post by Martin Storsjö
Post by Peter Große
+ *q = 0;
+ p++;
+ state = parse_id;
+ } else if (state == parse_id && !strncmp(p, "streams=", 8)) {
+ p += 8; // consume "streams="
+ state = parsing_streams;
+ } else if (state == parsing_streams) {
+ struct AdaptationSet *as = &c->as[c->nb_as - 1];
+ OutputStream *os;
+ char *stream_identifier;
+
+ q = p;
+ while (*q != '\0' && *q != ',' && *q != ' ') q++;
+ stream_identifier = av_strndup(p, q - p);
This should also be doable with strcspn(p, " ,"). And copy it into a local
stack buffer (e.g. with snprintf as above, to get bounds checking of that
buffer for free) instead of a strdup of a couple-bytes string.
ok.
Post by Martin Storsjö
Post by Peter Große
+
+ i = strtol(stream_identifier, NULL, 10);
+ if (i < 0 || i >= s->nb_streams) {
+ av_log(s, AV_LOG_ERROR, "Selected stream \"%s\" not
found!\n", stream_identifier);
If you hit this case, you leak stream_identifier (unless you make it a
local stack buffer as I suggested).
Will fix that.
Post by Martin Storsjö
Post by Peter Große
+ return -1;
Please return proper error codes, e.g. AVERROR(EINVAL) here.
ok.

Thanks for the comments ;)

Regards
Peter
Martin Storsjö
2017-01-27 20:28:19 UTC
Permalink
Post by Peter Große
On Fri, 27 Jan 2017 15:39:30 +0200 (EET)
Post by Martin Storsjö
Post by Peter Große
This patch is based on the stream assignment code in webmdashenc.
This muxer isn't available in libav, so the reference doesn't really work
in context here.
True, I thought I changed to something like "This patch is based on the stream
assignment code of a webm dash muxer by Vignesh Venkatasubramanian".
So far only a big part of the parse_adaption_set function is copied over, the
rest is basically my own code.
But it looks like, I'll to also make changes to that part (regarding your
comments below) so I might not need the reference anymore?!
It might be ok to drop the reference altogether. Or perhaps just add
something like "originally based partially on code by V.V."? That way, you
have something like proper attribution, given the amount of code taken
(i.e. much inspiration but not necessarily much literal lines).

// Martin
Martin Storsjö
2017-01-27 13:41:44 UTC
Permalink
Post by Peter Große
This patch is based on the stream assignment code in webmdashenc.
* Default to one AdaptationSet per stream
Previously all mapped streams of a media type (video, audio) where assigned
to a single AdaptationSet. Using the DASH live profile it is mandatory, that
the segments of all representations are aligned, which is currently not
enforced. This leads to problems when using video streams with different
key frame intervals. So to play safe, default to one AdaptationSet per stream,
unless overwritten by explicit assignment
* Make sure all streams are assigned to exactly one AdaptationSet
---
libavformat/dashenc.c | 193 ++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 162 insertions(+), 31 deletions(-)
diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 1b12d4f..c561ad1 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -24,6 +24,7 @@
#include <unistd.h>
#endif
+#include "libavutil/avutil.h"
#include "libavutil/avstring.h"
#include "libavutil/eval.h"
#include "libavutil/intreadwrite.h"
@@ -58,9 +59,15 @@ typedef struct Segment {
int n;
} Segment;
+typedef struct AdaptationSet {
+ char id[10];
+ enum AVMediaType media_type;
+ AVDictionary *metadata;
+} AdaptationSet;
Oh, I also forgot to say; as far as I can see (on a sloppy read-through),
metadata is unused in this patch. If that's really true, skip it here and
only add it in the next patch where it is used. That makes this patch
slightly less confusing.

// Martin
Peter Große
2017-01-26 23:25:16 UTC
Permalink
The dash_write function drops data, if no IOContext is initialized.
This might happen when a subordinate muxer calls avio_flush().
Using a dynamic buffer fixes that.

Signed-off-by: Peter Große <***@friiks.de>
---
libavformat/dashenc.c | 87 +++++++++++++++++++++++++++++----------------------
1 file changed, 50 insertions(+), 37 deletions(-)

diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index a0c7811..86b454e 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -68,11 +68,10 @@ typedef struct AdaptationSet {
typedef struct OutputStream {
AVFormatContext *ctx;
int ctx_inited, as_idx;
- uint8_t iobuf[32768];
AVIOContext *out;
int duration_written;
char initfile[1024];
- int64_t init_start_pos;
+ int64_t init_start_pos, pos;
int init_range_length;
int nb_segments, segments_size, segment_index;
Segment **segments;
@@ -108,14 +107,6 @@ typedef struct DASHContext {
const char *utc_timing_url;
} DASHContext;

-static int dash_write(void *opaque, uint8_t *buf, int buf_size)
-{
- OutputStream *os = opaque;
- if (os->out)
- avio_write(os->out, buf, buf_size);
- return buf_size;
-}
-
// RFC 6381
static void set_codec_str(AVFormatContext *s, AVCodecParameters *par,
char *str, int size)
@@ -182,6 +173,29 @@ static void set_codec_str(AVFormatContext *s, AVCodecParameters *par,
}
}

+static int flush_dynbuf(OutputStream *os, int *range_length)
+{
+ uint8_t *buffer;
+ int ret;
+
+ if (!os->ctx->pb) {
+ return AVERROR(EINVAL);
+ }
+
+ // flush
+ av_write_frame(os->ctx, NULL);
+ avio_flush(os->ctx->pb);
+
+ // write out to file
+ *range_length = avio_close_dyn_buf(os->ctx->pb, &buffer);
+ avio_write(os->out, buffer, *range_length);
+ av_free(buffer);
+
+ // re-open buffer
+ ret = avio_open_dyn_buf(&os->ctx->pb);
+ return ret;
+}
+
static void dash_free(AVFormatContext *s)
{
DASHContext *c = s->priv_data;
@@ -203,7 +217,7 @@ static void dash_free(AVFormatContext *s)
if (os->ctx && os->ctx_inited)
av_write_trailer(os->ctx);
if (os->ctx && os->ctx->pb)
- av_free(os->ctx->pb);
+ ffio_free_dyn_buf(&os->ctx->pb);
ff_format_io_close(s, &os->out);
if (os->ctx)
avformat_free_context(os->ctx);
@@ -689,7 +703,7 @@ static int dict_copy_entry(AVDictionary **dst, const AVDictionary *src, const ch
static int dash_write_header(AVFormatContext *s)
{
DASHContext *c = s->priv_data;
- int ret = 0, i;
+ int ret = 0, i, range_length;
AVOutputFormat *oformat;
char *ptr;
char basename[1024];
@@ -786,11 +800,9 @@ static int dash_write_header(AVFormatContext *s)
st->time_base = s->streams[i]->time_base;
ctx->avoid_negative_ts = s->avoid_negative_ts;

- ctx->pb = avio_alloc_context(os->iobuf, sizeof(os->iobuf), AVIO_FLAG_WRITE, os, NULL, dash_write, NULL);
- if (!ctx->pb) {
- ret = AVERROR(ENOMEM);
+ if ((ret = avio_open_dyn_buf(&ctx->pb)) < 0)
goto fail;
- }
+ ctx->pb->seekable = 0;

if (c->single_file) {
if (c->single_file_name)
@@ -807,11 +819,17 @@ static int dash_write_header(AVFormatContext *s)
os->init_start_pos = 0;

av_dict_set(&opts, "movflags", "frag_custom+dash+delay_moov", 0);
- if ((ret = avformat_write_header(ctx, &opts)) < 0) {
- goto fail;
- }
+ if ((ret = avformat_write_header(ctx, &opts)) < 0)
+ goto fail;
+
+ if ((ret = flush_dynbuf(os, &range_length)) < 0)
+ goto fail;
+
+ if (!c->single_file)
+ ff_format_io_close(s, &os->out);
+
+ os->pos = os->init_range_length = range_length;
os->ctx_inited = 1;
- avio_flush(ctx->pb);
av_dict_free(&opts);

av_log(s, AV_LOG_VERBOSE, "Representation %d init segment will be written to: %s\n", i, filename);
@@ -946,7 +964,6 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
for (i = 0; i < s->nb_streams; i++) {
OutputStream *os = &c->streams[i];
char filename[1024] = "", full_path[1024], temp_path[1024];
- int64_t start_pos;
int range_length, index_length = 0;

if (!os->duration_written)
@@ -964,15 +981,6 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
continue;
}

- if (!os->init_range_length) {
- av_write_frame(os->ctx, NULL);
- os->init_range_length = avio_tell(os->ctx->pb);
- if (!c->single_file)
- ff_format_io_close(s, &os->out);
- }
-
- start_pos = avio_tell(os->ctx->pb);
-
if (!c->single_file) {
dash_fill_tmpl_params(filename, sizeof(filename), c->media_seg_name, i, os->segment_index, os->bit_rate, os->start_pts);
snprintf(full_path, sizeof(full_path), "%s%s", c->dirname, filename);
@@ -980,18 +988,16 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
ret = s->io_open(s, &os->out, temp_path, AVIO_FLAG_WRITE, NULL);
if (ret < 0)
break;
- write_styp(os->ctx->pb);
} else {
snprintf(full_path, sizeof(full_path), "%s%s", c->dirname, os->initfile);
}

- av_write_frame(os->ctx, NULL);
- avio_flush(os->ctx->pb);
- os->duration_written = 0;
+ ret = flush_dynbuf(os, &range_length);
+ if (ret < 0)
+ break;

- range_length = avio_tell(os->ctx->pb) - start_pos;
if (c->single_file) {
- find_index_range(s, full_path, start_pos, &index_length);
+ find_index_range(s, full_path, os->pos, &index_length);
} else {
ff_format_io_close(s, &os->out);
ret = ff_rename(temp_path, full_path);
@@ -1009,8 +1015,15 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
}
}

- add_segment(os, filename, os->start_pts, os->max_pts - os->start_pts, start_pos, range_length, index_length);
+ add_segment(os, filename, os->start_pts, os->max_pts - os->start_pts, os->pos, range_length, index_length);
av_log(s, AV_LOG_VERBOSE, "Representation %d media segment %d written to: %s\n", i, os->segment_index, full_path);
+
+ // set new position
+ os->duration_written = 0;
+ os->pos += range_length;
+
+ // write chunk header
+ write_styp(os->ctx->pb);
}

if (c->window_size || (final && c->remove_at_exit)) {
--
2.10.2
Martin Storsjö
2017-01-27 14:02:23 UTC
Permalink
Post by Peter Große
The dash_write function drops data, if no IOContext is initialized.
This might happen when a subordinate muxer calls avio_flush().
Using a dynamic buffer fixes that.
---
libavformat/dashenc.c | 87 +++++++++++++++++++++++++++++----------------------
1 file changed, 50 insertions(+), 37 deletions(-)
This patch at least needs a mention of what muxer would do that.

Currently, the only subordinate muxer you can use is the mp4 one, and that
one (with the configured options) doesn't flush except when told to.
Post by Peter Große
diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index a0c7811..86b454e 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -68,11 +68,10 @@ typedef struct AdaptationSet {
typedef struct OutputStream {
AVFormatContext *ctx;
int ctx_inited, as_idx;
- uint8_t iobuf[32768];
AVIOContext *out;
int duration_written;
char initfile[1024];
- int64_t init_start_pos;
+ int64_t init_start_pos, pos;
int init_range_length;
int nb_segments, segments_size, segment_index;
Segment **segments;
@@ -108,14 +107,6 @@ typedef struct DASHContext {
const char *utc_timing_url;
} DASHContext;
-static int dash_write(void *opaque, uint8_t *buf, int buf_size)
-{
- OutputStream *os = opaque;
- if (os->out)
- avio_write(os->out, buf, buf_size);
- return buf_size;
-}
-
// RFC 6381
static void set_codec_str(AVFormatContext *s, AVCodecParameters *par,
char *str, int size)
@@ -182,6 +173,29 @@ static void set_codec_str(AVFormatContext *s, AVCodecParameters *par,
}
}
+static int flush_dynbuf(OutputStream *os, int *range_length)
+{
+ uint8_t *buffer;
+ int ret;
+
+ if (!os->ctx->pb) {
+ return AVERROR(EINVAL);
+ }
+
+ // flush
+ av_write_frame(os->ctx, NULL);
+ avio_flush(os->ctx->pb);
+
+ // write out to file
+ *range_length = avio_close_dyn_buf(os->ctx->pb, &buffer);
+ avio_write(os->out, buffer, *range_length);
+ av_free(buffer);
+
+ // re-open buffer
+ ret = avio_open_dyn_buf(&os->ctx->pb);
+ return ret;
The intermediate variable feels a little superfluous here
Post by Peter Große
+}
+
static void dash_free(AVFormatContext *s)
{
DASHContext *c = s->priv_data;
@@ -203,7 +217,7 @@ static void dash_free(AVFormatContext *s)
if (os->ctx && os->ctx_inited)
av_write_trailer(os->ctx);
if (os->ctx && os->ctx->pb)
- av_free(os->ctx->pb);
+ ffio_free_dyn_buf(&os->ctx->pb);
ff_format_io_close(s, &os->out);
if (os->ctx)
avformat_free_context(os->ctx);
@@ -689,7 +703,7 @@ static int dict_copy_entry(AVDictionary **dst, const AVDictionary *src, const ch
static int dash_write_header(AVFormatContext *s)
{
DASHContext *c = s->priv_data;
- int ret = 0, i;
+ int ret = 0, i, range_length;
AVOutputFormat *oformat;
char *ptr;
char basename[1024];
@@ -786,11 +800,9 @@ static int dash_write_header(AVFormatContext *s)
st->time_base = s->streams[i]->time_base;
ctx->avoid_negative_ts = s->avoid_negative_ts;
- ctx->pb = avio_alloc_context(os->iobuf, sizeof(os->iobuf), AVIO_FLAG_WRITE, os, NULL, dash_write, NULL);
- if (!ctx->pb) {
- ret = AVERROR(ENOMEM);
+ if ((ret = avio_open_dyn_buf(&ctx->pb)) < 0)
goto fail;
- }
+ ctx->pb->seekable = 0;
if (c->single_file) {
if (c->single_file_name)
@@ -807,11 +819,17 @@ static int dash_write_header(AVFormatContext *s)
os->init_start_pos = 0;
av_dict_set(&opts, "movflags", "frag_custom+dash+delay_moov", 0);
- if ((ret = avformat_write_header(ctx, &opts)) < 0) {
- goto fail;
- }
+ if ((ret = avformat_write_header(ctx, &opts)) < 0)
+ goto fail;
+
+ if ((ret = flush_dynbuf(os, &range_length)) < 0)
+ goto fail;
+
+ if (!c->single_file)
+ ff_format_io_close(s, &os->out);
+
+ os->pos = os->init_range_length = range_length;
This isn't correct. See c5e7ea13d2d4da0c5da91973a547afff6fe9e011. We
intentionally defer writing the init segment until we have passed the
first segment of packets into the muxer, to handle the cases where the
init segments depend on knowledge from the actual first few packets.

(Proper edit lists for handling b-frame delay needs to know the timestamp
of the first packet, and for AC3, the moov box takes a copy of the full
first packet.)

For using the webm muxer, you probably might need to have this behaviour
conditional on the kind of muxer.
Post by Peter Große
os->ctx_inited = 1;
- avio_flush(ctx->pb);
av_dict_free(&opts);
av_log(s, AV_LOG_VERBOSE, "Representation %d init segment will be written to: %s\n", i, filename);
@@ -946,7 +964,6 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
for (i = 0; i < s->nb_streams; i++) {
OutputStream *os = &c->streams[i];
char filename[1024] = "", full_path[1024], temp_path[1024];
- int64_t start_pos;
int range_length, index_length = 0;
if (!os->duration_written)
@@ -964,15 +981,6 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
continue;
}
- if (!os->init_range_length) {
- av_write_frame(os->ctx, NULL);
- os->init_range_length = avio_tell(os->ctx->pb);
- if (!c->single_file)
- ff_format_io_close(s, &os->out);
- }
-
- start_pos = avio_tell(os->ctx->pb);
-
if (!c->single_file) {
dash_fill_tmpl_params(filename, sizeof(filename), c->media_seg_name, i, os->segment_index, os->bit_rate, os->start_pts);
snprintf(full_path, sizeof(full_path), "%s%s", c->dirname, filename);
@@ -980,18 +988,16 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
ret = s->io_open(s, &os->out, temp_path, AVIO_FLAG_WRITE, NULL);
if (ret < 0)
break;
- write_styp(os->ctx->pb);
} else {
snprintf(full_path, sizeof(full_path), "%s%s", c->dirname, os->initfile);
}
- av_write_frame(os->ctx, NULL);
- avio_flush(os->ctx->pb);
- os->duration_written = 0;
+ ret = flush_dynbuf(os, &range_length);
+ if (ret < 0)
+ break;
- range_length = avio_tell(os->ctx->pb) - start_pos;
if (c->single_file) {
- find_index_range(s, full_path, start_pos, &index_length);
+ find_index_range(s, full_path, os->pos, &index_length);
} else {
ff_format_io_close(s, &os->out);
ret = ff_rename(temp_path, full_path);
@@ -1009,8 +1015,15 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
}
}
- add_segment(os, filename, os->start_pts, os->max_pts - os->start_pts, start_pos, range_length, index_length);
+ add_segment(os, filename, os->start_pts, os->max_pts - os->start_pts, os->pos, range_length, index_length);
av_log(s, AV_LOG_VERBOSE, "Representation %d media segment %d written to: %s\n", i, os->segment_index, full_path);
+
+ // set new position
+ os->duration_written = 0;
+ os->pos += range_length;
+
+ // write chunk header
+ write_styp(os->ctx->pb);
Doesn't this miss writing the styp header for the first segment?

// Martin
Peter Große
2017-01-27 18:38:28 UTC
Permalink
On Fri, 27 Jan 2017 16:02:23 +0200 (EET)
Post by Martin Storsjö
Post by Peter Große
The dash_write function drops data, if no IOContext is initialized.
This might happen when a subordinate muxer calls avio_flush().
Using a dynamic buffer fixes that.
This patch at least needs a mention of what muxer would do that.
Currently, the only subordinate muxer you can use is the mp4 one, and that
one (with the configured options) doesn't flush except when told to.
Ah ok.
Post by Martin Storsjö
Post by Peter Große
+ // re-open buffer
+ ret = avio_open_dyn_buf(&os->ctx->pb);
+ return ret;
The intermediate variable feels a little superfluous here
Agreed.
Post by Martin Storsjö
Post by Peter Große
av_dict_set(&opts, "movflags", "frag_custom+dash+delay_moov", 0);
- if ((ret = avformat_write_header(ctx, &opts)) < 0) {
- goto fail;
- }
+ if ((ret = avformat_write_header(ctx, &opts)) < 0)
+ goto fail;
+
+ if ((ret = flush_dynbuf(os, &range_length)) < 0)
+ goto fail;
+
+ if (!c->single_file)
+ ff_format_io_close(s, &os->out);
+
+ os->pos = os->init_range_length = range_length;
This isn't correct. See c5e7ea13d2d4da0c5da91973a547afff6fe9e011. We
intentionally defer writing the init segment until we have passed the
first segment of packets into the muxer, to handle the cases where the
init segments depend on knowledge from the actual first few packets.
(Proper edit lists for handling b-frame delay needs to know the timestamp
of the first packet, and for AC3, the moov box takes a copy of the full
first packet.)
For using the webm muxer, you probably might need to have this behaviour
conditional on the kind of muxer.
So how do you avoid spilling actual packets into the init segment?
Is it that av_write_frame only writes to the file, what was flushed before
using avio_flush?

This code resulted from dealing with matroskaenc having his own idea when to
flush and start new clusters.

So my first idea would to the move the "writing out the init segment" code to a
function and call it conditionally depending on the muxer.
If webm, call it in write_header. Then in dash_flush call it, if
os->init_range_length is zero.
Post by Martin Storsjö
Post by Peter Große
+ // write chunk header
+ write_styp(os->ctx->pb);
Doesn't this miss writing the styp header for the first segment?
Yes. In earlier versions of the code dash_flush was called with the init
segment still to be written. So it made sense to only write it after the first
one.

Regards
Peter
Martin Storsjö
2017-01-27 20:36:26 UTC
Permalink
Post by Peter Große
On Fri, 27 Jan 2017 16:02:23 +0200 (EET)
Post by Martin Storsjö
Post by Peter Große
av_dict_set(&opts, "movflags", "frag_custom+dash+delay_moov", 0);
- if ((ret = avformat_write_header(ctx, &opts)) < 0) {
- goto fail;
- }
+ if ((ret = avformat_write_header(ctx, &opts)) < 0)
+ goto fail;
+
+ if ((ret = flush_dynbuf(os, &range_length)) < 0)
+ goto fail;
+
+ if (!c->single_file)
+ ff_format_io_close(s, &os->out);
+
+ os->pos = os->init_range_length = range_length;
This isn't correct. See c5e7ea13d2d4da0c5da91973a547afff6fe9e011. We
intentionally defer writing the init segment until we have passed the
first segment of packets into the muxer, to handle the cases where the
init segments depend on knowledge from the actual first few packets.
(Proper edit lists for handling b-frame delay needs to know the timestamp
of the first packet, and for AC3, the moov box takes a copy of the full
first packet.)
For using the webm muxer, you probably might need to have this behaviour
conditional on the kind of muxer.
So how do you avoid spilling actual packets into the init segment?
Is it that av_write_frame only writes to the file, what was flushed before
using avio_flush?
This relies on the fact that in frag_custom mode, the muxer itself will
never spontaneously flush any output, it only does that when explicitly
flushed (with av_write_frame(NULL)). When you have the delay_moov flag
added, it won't write anything at all during av_write_header, and will
write the init segment during the first av_write_frame(NULL). So if
changing existing code, if you add delay_moov, you need to add one extra
flush to get the init segment. (If you flush earlier, the moov will be
based more on guesses and less on actual data.)
Post by Peter Große
This code resulted from dealing with matroskaenc having his own idea when to
flush and start new clusters.
So my first idea would to the move the "writing out the init segment" code to a
function and call it conditionally depending on the muxer.
If webm, call it in write_header. Then in dash_flush call it, if
os->init_range_length is zero.
Yeah, that sounds sensible. Or rather invert the condition - instead of
if (webm), do if (!mp4), since it's specific to the mp4+delay_moov combo.

// Martin;
Peter Große
2017-01-26 23:25:17 UTC
Permalink
Use webm muxer for VP8, VP9 and Opus codec, mp4 muxer otherwise.
Also copy stream metadata to output stream.

Signed-off-by: Peter Große <***@friiks.de>
---
libavformat/dashenc.c | 68 ++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 54 insertions(+), 14 deletions(-)

diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 86b454e..24665cd 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -113,6 +113,23 @@ static void set_codec_str(AVFormatContext *s, AVCodecParameters *par,
{
const AVCodecTag *tags[2] = { NULL, NULL };
uint32_t tag;
+
+ // common Webm codecs are not part of RFC 6381
+ switch (par->codec_id) {
+ case AV_CODEC_ID_VP8:
+ snprintf(str, size, "vp8");
+ return;
+ case AV_CODEC_ID_VP9:
+ snprintf(str, size, "vp9");
+ return;
+ case AV_CODEC_ID_VORBIS:
+ snprintf(str, size, "vorbis");
+ return;
+ case AV_CODEC_ID_OPUS:
+ snprintf(str, size, "opus");
+ return;
+ }
+
if (par->codec_type == AVMEDIA_TYPE_VIDEO)
tags[0] = ff_codec_movvideo_tags;
else if (par->codec_type == AVMEDIA_TYPE_AUDIO)
@@ -495,11 +512,11 @@ static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_ind
continue;

if (as->media_type == AVMEDIA_TYPE_VIDEO) {
- avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/mp4\" codecs=\"%s\"%s width=\"%d\" height=\"%d\">\n",
- i, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->width, s->streams[i]->codecpar->height);
+ avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/%s\" codecs=\"%s\"%s width=\"%d\" height=\"%d\">\n",
+ i, os->ctx->oformat->name, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->width, s->streams[i]->codecpar->height);
} else {
- avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/mp4\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n",
- i, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->sample_rate);
+ avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/%s\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n",
+ i, os->ctx->oformat->name, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->sample_rate);
avio_printf(out, "\t\t\t\t<AudioChannelConfiguration schemeIdUri=\"urn:mpeg:dash:23003:3:audio_channel_configuration:2011\" value=\"%d\" />\n",
s->streams[i]->codecpar->channels);
}
@@ -700,11 +717,18 @@ static int dict_copy_entry(AVDictionary **dst, const AVDictionary *src, const ch
return 0;
}

+static int dict_set_int(AVDictionary **pm, const char *key, int64_t value, int flags)
+{
+ char valuestr[22];
+ snprintf(valuestr, sizeof(valuestr), "%"PRId64, value);
+ flags &= ~AV_DICT_DONT_STRDUP_VAL;
+ return av_dict_set(pm, key, valuestr, flags);
+}
+
static int dash_write_header(AVFormatContext *s)
{
DASHContext *c = s->priv_data;
int ret = 0, i, range_length;
- AVOutputFormat *oformat;
char *ptr;
char basename[1024];

@@ -727,12 +751,6 @@ static int dash_write_header(AVFormatContext *s)
if (ptr)
*ptr = '\0';

- oformat = av_guess_format("mp4", NULL, NULL);
- if (!oformat) {
- ret = AVERROR_MUXER_NOT_FOUND;
- goto fail;
- }
-
c->streams = av_mallocz(sizeof(*c->streams) * s->nb_streams);
if (!c->streams) {
ret = AVERROR(ENOMEM);
@@ -784,12 +802,25 @@ static int dash_write_header(AVFormatContext *s)
ret = AVERROR(ENOMEM);
goto fail;
}
+
+ // choose muxer based on codec: webm for VP8/9 and opus, mp4 otherwise
+ if (s->streams[i]->codecpar->codec_id == AV_CODEC_ID_VP8 ||
+ s->streams[i]->codecpar->codec_id == AV_CODEC_ID_VP9 ||
+ s->streams[i]->codecpar->codec_id == AV_CODEC_ID_OPUS) {
+ ctx->oformat = av_guess_format("webm", NULL, NULL);
+ } else {
+ ctx->oformat = av_guess_format("mp4", NULL, NULL);
+ }
+ if (!ctx->oformat) {
+ ret = AVERROR_MUXER_NOT_FOUND;
+ goto fail;
+ }
os->ctx = ctx;
- ctx->oformat = oformat;
ctx->interrupt_callback = s->interrupt_callback;
ctx->opaque = s->opaque;
ctx->io_close = s->io_close;
ctx->io_open = s->io_open;
+ av_dict_copy(&ctx->metadata, s->metadata, 0);

if (!(st = avformat_new_stream(ctx, NULL))) {
ret = AVERROR(ENOMEM);
@@ -798,7 +829,10 @@ static int dash_write_header(AVFormatContext *s)
avcodec_parameters_copy(st->codecpar, s->streams[i]->codecpar);
st->sample_aspect_ratio = s->streams[i]->sample_aspect_ratio;
st->time_base = s->streams[i]->time_base;
+ st->avg_frame_rate = s->streams[i]->avg_frame_rate;
ctx->avoid_negative_ts = s->avoid_negative_ts;
+ ctx->flags = s->flags;
+ ctx->max_delay = s->max_delay;

if ((ret = avio_open_dyn_buf(&ctx->pb)) < 0)
goto fail;
@@ -818,7 +852,12 @@ static int dash_write_header(AVFormatContext *s)
goto fail;
os->init_start_pos = 0;

- av_dict_set(&opts, "movflags", "frag_custom+dash+delay_moov", 0);
+ if (!strcmp(ctx->oformat->name, "mp4")) {
+ av_dict_set(&opts, "movflags", "frag_custom+dash+delay_moov", 0);
+ } else {
+ dict_set_int(&opts, "cluster_time_limit", c->min_seg_duration / 1000, 0);
+ dict_set_int(&opts, "cluster_size_limit", 5 * 1024 * 1024, 0); // set a large cluster size limit
+ }
if ((ret = avformat_write_header(ctx, &opts)) < 0)
goto fail;

@@ -1023,7 +1062,8 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
os->pos += range_length;

// write chunk header
- write_styp(os->ctx->pb);
+ if (!strcmp(os->ctx->oformat->name, "mp4"))
+ write_styp(os->ctx->pb);
}

if (c->window_size || (final && c->remove_at_exit)) {
--
2.10.2
Martin Storsjö
2017-01-27 14:09:06 UTC
Permalink
Post by Peter Große
Use webm muxer for VP8, VP9 and Opus codec, mp4 muxer otherwise.
Also copy stream metadata to output stream.
---
libavformat/dashenc.c | 68 ++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 54 insertions(+), 14 deletions(-)
diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 86b454e..24665cd 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -113,6 +113,23 @@ static void set_codec_str(AVFormatContext *s, AVCodecParameters *par,
{
const AVCodecTag *tags[2] = { NULL, NULL };
uint32_t tag;
+
+ // common Webm codecs are not part of RFC 6381
+ switch (par->codec_id) {
+ snprintf(str, size, "vp8");
+ return;
+ snprintf(str, size, "vp9");
+ return;
+ snprintf(str, size, "vorbis");
+ return;
+ snprintf(str, size, "opus");
+ return;
+ }
+
Hmm, I'm pondering if it'd be worth to store these in some more compact
form, like a table, for all codecs which just have a single simple plain
string?
Post by Peter Große
if (par->codec_type == AVMEDIA_TYPE_VIDEO)
tags[0] = ff_codec_movvideo_tags;
else if (par->codec_type == AVMEDIA_TYPE_AUDIO)
@@ -495,11 +512,11 @@ static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_ind
continue;
if (as->media_type == AVMEDIA_TYPE_VIDEO) {
- avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/mp4\" codecs=\"%s\"%s width=\"%d\" height=\"%d\">\n",
- i, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->width, s->streams[i]->codecpar->height);
+ avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/%s\" codecs=\"%s\"%s width=\"%d\" height=\"%d\">\n",
+ i, os->ctx->oformat->name, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->width, s->streams[i]->codecpar->height);
} else {
- avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/mp4\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n",
- i, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->sample_rate);
+ avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/%s\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n",
+ i, os->ctx->oformat->name, os->codec_str, os->bandwidth_str, s->streams[i]->codecpar->sample_rate);
Using oformat->name here feels a little fragile. I think I'd rather have
that stored as a string somewhere in the context. (Not sure if it's better
to have it "flexible" to compose it as video/<string> and audio/<string>,
or store the full "video/mp4" etc as audioMime and videoMime.)
Post by Peter Große
avio_printf(out, "\t\t\t\t<AudioChannelConfiguration schemeIdUri=\"urn:mpeg:dash:23003:3:audio_channel_configuration:2011\" value=\"%d\" />\n",
s->streams[i]->codecpar->channels);
}
@@ -700,11 +717,18 @@ static int dict_copy_entry(AVDictionary **dst, const AVDictionary *src, const ch
return 0;
}
+static int dict_set_int(AVDictionary **pm, const char *key, int64_t value, int flags)
+{
+ char valuestr[22];
+ snprintf(valuestr, sizeof(valuestr), "%"PRId64, value);
+ flags &= ~AV_DICT_DONT_STRDUP_VAL;
+ return av_dict_set(pm, key, valuestr, flags);
+}
+
static int dash_write_header(AVFormatContext *s)
{
DASHContext *c = s->priv_data;
int ret = 0, i, range_length;
- AVOutputFormat *oformat;
char *ptr;
char basename[1024];
@@ -727,12 +751,6 @@ static int dash_write_header(AVFormatContext *s)
if (ptr)
*ptr = '\0';
- oformat = av_guess_format("mp4", NULL, NULL);
- if (!oformat) {
- ret = AVERROR_MUXER_NOT_FOUND;
- goto fail;
- }
-
c->streams = av_mallocz(sizeof(*c->streams) * s->nb_streams);
if (!c->streams) {
ret = AVERROR(ENOMEM);
@@ -784,12 +802,25 @@ static int dash_write_header(AVFormatContext *s)
ret = AVERROR(ENOMEM);
goto fail;
}
+
+ // choose muxer based on codec: webm for VP8/9 and opus, mp4 otherwise
+ if (s->streams[i]->codecpar->codec_id == AV_CODEC_ID_VP8 ||
+ s->streams[i]->codecpar->codec_id == AV_CODEC_ID_VP9 ||
+ s->streams[i]->codecpar->codec_id == AV_CODEC_ID_OPUS) {
+ ctx->oformat = av_guess_format("webm", NULL, NULL);
+ } else {
+ ctx->oformat = av_guess_format("mp4", NULL, NULL);
+ }
+ if (!ctx->oformat) {
+ ret = AVERROR_MUXER_NOT_FOUND;
+ goto fail;
+ }
os->ctx = ctx;
- ctx->oformat = oformat;
ctx->interrupt_callback = s->interrupt_callback;
ctx->opaque = s->opaque;
ctx->io_close = s->io_close;
ctx->io_open = s->io_open;
+ av_dict_copy(&ctx->metadata, s->metadata, 0);
Does the webm muxer need some specific metadata which we don't pass
through right now, or is it just for making sure that metadata ends up set
on the chained muxer and included in the stream? I'd at least want a
comment in the commit message explaining why this is necessary.
Post by Peter Große
if (!(st = avformat_new_stream(ctx, NULL))) {
ret = AVERROR(ENOMEM);
@@ -798,7 +829,10 @@ static int dash_write_header(AVFormatContext *s)
avcodec_parameters_copy(st->codecpar, s->streams[i]->codecpar);
st->sample_aspect_ratio = s->streams[i]->sample_aspect_ratio;
st->time_base = s->streams[i]->time_base;
+ st->avg_frame_rate = s->streams[i]->avg_frame_rate;
ctx->avoid_negative_ts = s->avoid_negative_ts;
+ ctx->flags = s->flags;
+ ctx->max_delay = s->max_delay;
Are these new settings strictly necessary for chained-webm? If not, I'd
rather add them in a separate commit afterwards, with an explanation on
what it helps for.

// Martin
Peter Große
2017-01-27 18:55:38 UTC
Permalink
On Fri, 27 Jan 2017 16:09:06 +0200 (EET)
Post by Martin Storsjö
Post by Peter Große
+ // common Webm codecs are not part of RFC 6381
+ switch (par->codec_id) {
+ snprintf(str, size, "vp8");
+ return;
+ snprintf(str, size, "vp9");
+ return;
+ snprintf(str, size, "vorbis");
+ return;
+ snprintf(str, size, "opus");
+ return;
+ }
+
Hmm, I'm pondering if it'd be worth to store these in some more compact
form, like a table, for all codecs which just have a single simple plain
string?
Do you have some example code? I'm not sure if I get how to do tables in C.

Like with a struct?

struct codec_string {
int codec_id;
const char *codec_str;
};
struct codec_string codecs[] = {
{AV_CODEC_ID_VP8, "vp8"},
{AV_CODEC_ID_VP9, "vp9"},
...
};
Post by Martin Storsjö
Using oformat->name here feels a little fragile. I think I'd rather have
that stored as a string somewhere in the context. (Not sure if it's better
to have it "flexible" to compose it as video/<string> and audio/<string>,
or store the full "video/mp4" etc as audioMime and videoMime.)
I might opt for the short version, since I use the format name also for conditionals like

if (!strcmp(os->ctx->oformat->name, "mp4"))

Therefor maybe a char *format_name in the OutputStream struct?
Post by Martin Storsjö
Post by Peter Große
ctx->interrupt_callback = s->interrupt_callback;
ctx->opaque = s->opaque;
ctx->io_close = s->io_close;
ctx->io_open = s->io_open;
+ av_dict_copy(&ctx->metadata, s->metadata, 0);
Does the webm muxer need some specific metadata which we don't pass
through right now, or is it just for making sure that metadata ends up set
on the chained muxer and included in the stream? I'd at least want a
comment in the commit message explaining why this is necessary.
No specific metadata is needed. I can move this to a separate commit.
Post by Martin Storsjö
Post by Peter Große
if (!(st = avformat_new_stream(ctx, NULL))) {
ret = AVERROR(ENOMEM);
@@ -798,7 +829,10 @@ static int dash_write_header(AVFormatContext *s)
avcodec_parameters_copy(st->codecpar, s->streams[i]->codecpar);
st->sample_aspect_ratio = s->streams[i]->sample_aspect_ratio;
st->time_base = s->streams[i]->time_base;
+ st->avg_frame_rate = s->streams[i]->avg_frame_rate;
ctx->avoid_negative_ts = s->avoid_negative_ts;
+ ctx->flags = s->flags;
+ ctx->max_delay = s->max_delay;
Are these new settings strictly necessary for chained-webm? If not, I'd
rather add them in a separate commit afterwards, with an explanation on
what it helps for.
To be honest, I haven't checked. I just looked at what other muxers having subordinate muxers do, when they initialize the context.

Regards
Peter
Martin Storsjö
2017-01-27 20:46:08 UTC
Permalink
Post by Peter Große
On Fri, 27 Jan 2017 16:09:06 +0200 (EET)
Post by Martin Storsjö
Post by Peter Große
+ // common Webm codecs are not part of RFC 6381
+ switch (par->codec_id) {
+ snprintf(str, size, "vp8");
+ return;
+ snprintf(str, size, "vp9");
+ return;
+ snprintf(str, size, "vorbis");
+ return;
+ snprintf(str, size, "opus");
+ return;
+ }
+
Hmm, I'm pondering if it'd be worth to store these in some more compact
form, like a table, for all codecs which just have a single simple plain
string?
Do you have some example code? I'm not sure if I get how to do tables in C.
Like with a struct?
struct codec_string {
int codec_id;
const char *codec_str;
};
struct codec_string codecs[] = {
{AV_CODEC_ID_VP8, "vp8"},
{AV_CODEC_ID_VP9, "vp9"},
...
};
Yes, something like this. If favouring compact code (which we tend to do),
something like this would do:

static struct codec_string {
int id;
const char *str;
} codecs[] = {
{ AV_CODEC_ID_VP8, "vp8" },
...
{ 0, NULL }
};

int i;
for (i = 0; codecs[i].id; i++)
if (codecs[i].id == id)
return codecs[i].str;

All in all, the number of lines probably might end up higher than before,
but the bulk of the repetitive part is more compact, and adding another
entry is less annoying.
Post by Peter Große
Post by Martin Storsjö
Using oformat->name here feels a little fragile. I think I'd rather have
that stored as a string somewhere in the context. (Not sure if it's better
to have it "flexible" to compose it as video/<string> and audio/<string>,
or store the full "video/mp4" etc as audioMime and videoMime.)
I might opt for the short version, since I use the format name also for conditionals like
if (!strcmp(os->ctx->oformat->name, "mp4"))
Therefor maybe a char *format_name in the OutputStream struct?
That might be ok. Maybe a comment at the place where it's assigned, saying
that this will be used as part of a mime string (so future other formats
won't accidentally create bogus mime strings).
Post by Peter Große
Post by Martin Storsjö
Post by Peter Große
ctx->interrupt_callback = s->interrupt_callback;
ctx->opaque = s->opaque;
ctx->io_close = s->io_close;
ctx->io_open = s->io_open;
+ av_dict_copy(&ctx->metadata, s->metadata, 0);
Does the webm muxer need some specific metadata which we don't pass
through right now, or is it just for making sure that metadata ends up set
on the chained muxer and included in the stream? I'd at least want a
comment in the commit message explaining why this is necessary.
No specific metadata is needed. I can move this to a separate commit.
Post by Martin Storsjö
Post by Peter Große
if (!(st = avformat_new_stream(ctx, NULL))) {
ret = AVERROR(ENOMEM);
@@ -798,7 +829,10 @@ static int dash_write_header(AVFormatContext *s)
avcodec_parameters_copy(st->codecpar, s->streams[i]->codecpar);
st->sample_aspect_ratio = s->streams[i]->sample_aspect_ratio;
st->time_base = s->streams[i]->time_base;
+ st->avg_frame_rate = s->streams[i]->avg_frame_rate;
ctx->avoid_negative_ts = s->avoid_negative_ts;
+ ctx->flags = s->flags;
+ ctx->max_delay = s->max_delay;
Are these new settings strictly necessary for chained-webm? If not, I'd
rather add them in a separate commit afterwards, with an explanation on
what it helps for.
To be honest, I haven't checked. I just looked at what other muxers having subordinate muxers do, when they initialize the context.
Ok - absolutely in another commit then, if at all.

In general it might seem useful to pass on as many fields as possible, but
so far, I've taken the inverse approach; only forward the ones I
absolutely know I need. I probably don't mind adding this with a commit
message with a good motivation though.

// Martin
Peter Große
2017-01-26 23:25:18 UTC
Permalink
Signed-off-by: Peter Große <***@friiks.de>
---
doc/muxers.texi | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 53 insertions(+)

diff --git a/doc/muxers.texi b/doc/muxers.texi
index 5430da7..95d8051 100644
--- a/doc/muxers.texi
+++ b/doc/muxers.texi
@@ -53,6 +53,59 @@ avconv -i INPUT -c:a pcm_u8 -c:v mpeg2video -f crc -

See also the @ref{framecrc} muxer.

+@anchor{dash}
+@section dash
+
+Dynamic Adaptive Streaming over HTTP (DASH) muxer that creates segments
+and manifest files according to the MPEG-DASH standard ISO/IEC 23009-1:2014.
+
+For more information see:
+
+@itemize @bullet
+@item
+WebM DASH Specification: @url{https://sites.google.com/a/webmproject.org/wiki/adaptive-streaming/webm-dash-specification}
+@item
+ISO DASH Specification: @url{http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip}
+@end itemize
+
+It creates a MPD manifest file and segment files for each stream.
+
+The segment filename might contain pre-defined identifiers used with SegmentTemplate
+as defined in section 5.3.9.4.4 of the standard. Available identifiers are "$RepresentationID$",
+"$Number$", "$Bandwidth$" and "$Time$".
+
+@example
+avconv -i in.nut -f dash
+@end example
+
+@table @option
+@item -min_seg_duration @var{seconds}
+Set the segment length in seconds.
+@item -window_size @var{size}
+Set the maximum number of segments kept in the manifest.
+@item -extra_window_size @var{size}
+Set the maximum number of segments kept outside of the manifest before removing from disk.
+@item -remove_at_exit @var{remove}
+Enable (1) or disable (0) removal of all segments when finished.
+@item -use_template @var{template}
+Enable (1) or disable (0) use of SegmentTemplate instead of SegmentList.
+@item -use_timeline @var{timeline}
+Enable (1) or disable (0) use of SegmentTimeline in SegmentTemplate.
+@item -single_file @var{single_file}
+Enable (1) or disable (0) storing all segments in one file, accessed using byte ranges.
+@item -single_file_name @var{file_name}
+DASH-templated name to be used for baseURL. Implies @var{single_file} set to "1".
+@item -init_seg_name @var{init_name}
+DASH-templated name to used for the initialization segment. Default is "init-stream$RepresentationID$.m4s"
+@item -media_seg_name @var{segment_name}
+DASH-templated name to used for the media segments. Default is "chunk-stream$RepresentationID$-$Number%05d$.m4s"
+@item -utc_timing_url @var{utc_url}
+URL of the page that will return the UTC timestamp in ISO format. Example: "https://time.akamai.com/?iso"
+@item -adaptation_sets @var{adaptation_sets}
+Assign streams to AdaptationSets. Syntax is "id=x,streams=a,b,c id=y,streams=d,e" with x and y being the IDs
+of the adaptation sets and a,b,c,d and e are the indices of the mapped streams.
+@end table
+
@anchor{framecrc}
@section framecrc
--
2.10.2
Martin Storsjö
2017-01-27 14:12:46 UTC
Permalink
Post by Peter Große
---
doc/muxers.texi | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 53 insertions(+)
diff --git a/doc/muxers.texi b/doc/muxers.texi
index 5430da7..95d8051 100644
--- a/doc/muxers.texi
+++ b/doc/muxers.texi
@@ -53,6 +53,59 @@ avconv -i INPUT -c:a pcm_u8 -c:v mpeg2video -f crc -
+
+Dynamic Adaptive Streaming over HTTP (DASH) muxer that creates segments
+and manifest files according to the MPEG-DASH standard ISO/IEC 23009-1:2014.
+
+
I'd rather have the actual ISO spec first :P
Post by Peter Große
+
+It creates a MPD manifest file and segment files for each stream.
+
+The segment filename might contain pre-defined identifiers used with SegmentTemplate
+as defined in section 5.3.9.4.4 of the standard. Available identifiers are "$RepresentationID$",
+"$Number$", "$Bandwidth$" and "$Time$".
+
+avconv -i in.nut -f dash
It might be of value to show a full testcase also, where you use multiple
quality streams for both audio and video.

I've used a command line like this locally for testing that:

./avconv -re -i <input> -map 0 -map 0 -acodec libfdk_aac -vcodec libx264
-b:v:0 800k -b:v:1 300k -s:v:1 320x170 -profile:v:1 baseline -profile:v:0
main -bf 1 -keyint_min 120 -g 120 -sc_threshold 0 -b_strategy 0
-use_timeline 1 -use_template 1 -window_size 5 -f dash -ar:a:1 22050
/path/to/out.mpd

This should afaik produce well-aligned keyframes in all substreams. Feel
free to pick through this and see what of it makes sense to include in an
example.

// Martin
Peter Große
2017-01-27 19:00:53 UTC
Permalink
On Fri, 27 Jan 2017 16:12:46 +0200 (EET)
Post by Martin Storsjö
Post by Peter Große
@url{https://sites.google.com/a/webmproject.org/wiki/adaptive-streaming/webm-dash-specification}
@url{http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip}
I'd rather have the actual ISO spec first :P
No problem ;)

As far as I get it they won't merge anytime soon. Google widely uses WebM DASH,
but MPEG alliance doesn't care about media containers which are not their own.
Post by Martin Storsjö
Post by Peter Große
+It creates a MPD manifest file and segment files for each stream.
+
+The segment filename might contain pre-defined identifiers used with
SegmentTemplate +as defined in section 5.3.9.4.4 of the standard. Available
identifiers are "$RepresentationID$", +"$Number$", "$Bandwidth$" and
"$Time$". +
+avconv -i in.nut -f dash
It might be of value to show a full testcase also, where you use multiple
quality streams for both audio and video.
Ah, I missed that. I planed to add a full example, but forgot about it.
Post by Martin Storsjö
./avconv -re -i <input> -map 0 -map 0 -acodec libfdk_aac -vcodec libx264
-b:v:0 800k -b:v:1 300k -s:v:1 320x170 -profile:v:1 baseline -profile:v:0
main -bf 1 -keyint_min 120 -g 120 -sc_threshold 0 -b_strategy 0
-use_timeline 1 -use_template 1 -window_size 5 -f dash -ar:a:1 22050
/path/to/out.mpd
This should afaik produce well-aligned keyframes in all substreams. Feel
free to pick through this and see what of it makes sense to include in an
example.
Will have a look.

Regards
Peter
Martin Storsjö
2017-01-27 12:25:54 UTC
Permalink
Post by Peter Große
Appends Z to timestamp to force ISO8601 datetime parsing as UTC.
Without Z, some browsers (Chrome) interpret the timestamp as
localtime and others (Firefox) interpret it as UTC.
---
libavformat/dashenc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index ce01860..2b27950 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -429,7 +429,7 @@ static void format_date_now(char *buf, int size)
struct tm *ptm, tmbuf;
ptm = gmtime_r(&t, &tmbuf);
if (ptm) {
- if (!strftime(buf, size, "%Y-%m-%dT%H:%M:%S", ptm))
+ if (!strftime(buf, size, "%Y-%m-%dT%H:%M:%SZ", ptm))
buf[0] = '\0';
}
}
--
2.10.2
LGTM, pushed

// Martin
Loading...