Discussion:
[PATCH 0/9] avcodec_decode_audio4
(too old to reply)
Justin Ruggles
2011-11-12 22:35:46 UTC
Permalink
Here is my attempt at updating the audio decoders to use get_buffer().

For decoders, there are several advantages.

1) decoding will not fail because of a user-provided buffer being too small.

2) the arbitrary AVCODEC_MAX_AUDIO_FRAME_SIZE limit will be removed.

3) decoders which have workarounds to send correct decoded output
even with a small output buffer will no longer have to do so.

4) planar support will be easier to implement

5) decoder output will more closely match what is used by libavfilter

I have provided an example of the new API implementation in the pcm decoders.
The patches for the remaining decoders will be sent after the API has been
discussed and agreed upon.

Thanks,
Justin

Justin Ruggles (9):
libavutil: add planar sample formats and av_sample_fmt_is_planar()
libavutil: add utility functions to simplify allocation of audio
buffers.
Add avcodec_decode_audio4().
avconv: use avcodec_decode_audio4() instead of
avcodec_decode_audio3()
avformat: use avcodec_decode_audio4() in avformat_find_stream_info()
avplay: use a separate buffer for playing silence
avplay: use avcodec_decode_audio4()
api-example: update to use avcodec_decode_audio4()
pcmdec: implement new audio decoding API

avconv.c | 61 +++++++---------
avplay.c | 47 +++++++-----
doc/APIchanges | 14 ++++
libavcodec/api-example.c | 18 ++--
libavcodec/avcodec.h | 108 ++++++++++++++++++++++++++-
libavcodec/pcm.c | 42 ++++++----
libavcodec/utils.c | 186 +++++++++++++++++++++++++++++++++++++++++-----
libavcodec/version.h | 5 +-
libavformat/utils.c | 29 ++++----
libavutil/avutil.h | 2 +-
libavutil/samplefmt.c | 103 ++++++++++++++++++++++++-
libavutil/samplefmt.h | 79 +++++++++++++++++++
12 files changed, 573 insertions(+), 121 deletions(-)
Justin Ruggles
2011-11-12 22:35:47 UTC
Permalink
---
doc/APIchanges | 3 +++
libavutil/avutil.h | 2 +-
libavutil/samplefmt.c | 23 ++++++++++++++++++-----
libavutil/samplefmt.h | 15 +++++++++++++++
4 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 5ef1fa5..4549415 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil: 2011-04-18

API changes, most recent first:

+2011-xx-xx - xxxxxxx - lavu 51.15.0
+ Add planar sample formats and av_sample_fmt_is_planar() to samplefmt.h.
+
2011-11-06 - ba04ecf - lavu 51.14.0
Add av_strcasecmp() and av_strncasecmp() to avstring.h.

diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index 5ad7034..4b226e0 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -40,7 +40,7 @@
#define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)

#define LIBAVUTIL_VERSION_MAJOR 51
-#define LIBAVUTIL_VERSION_MINOR 14
+#define LIBAVUTIL_VERSION_MINOR 15
#define LIBAVUTIL_VERSION_MICRO 0

#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
diff --git a/libavutil/samplefmt.c b/libavutil/samplefmt.c
index 5b0bfa0..d47c5e7 100644
--- a/libavutil/samplefmt.c
+++ b/libavutil/samplefmt.c
@@ -25,15 +25,21 @@
typedef struct SampleFmtInfo {
const char *name;
int bits;
+ int planar;
} SampleFmtInfo;

/** this table gives more information about formats */
static const SampleFmtInfo sample_fmt_info[AV_SAMPLE_FMT_NB] = {
- [AV_SAMPLE_FMT_U8] = { .name = "u8", .bits = 8 },
- [AV_SAMPLE_FMT_S16] = { .name = "s16", .bits = 16 },
- [AV_SAMPLE_FMT_S32] = { .name = "s32", .bits = 32 },
- [AV_SAMPLE_FMT_FLT] = { .name = "flt", .bits = 32 },
- [AV_SAMPLE_FMT_DBL] = { .name = "dbl", .bits = 64 },
+ [AV_SAMPLE_FMT_U8] = { .name = "u8", .bits = 8, .planar = 0 },
+ [AV_SAMPLE_FMT_S16] = { .name = "s16", .bits = 16, .planar = 0 },
+ [AV_SAMPLE_FMT_S32] = { .name = "s32", .bits = 32, .planar = 0 },
+ [AV_SAMPLE_FMT_FLT] = { .name = "flt", .bits = 32, .planar = 0 },
+ [AV_SAMPLE_FMT_DBL] = { .name = "dbl", .bits = 64, .planar = 0 },
+ [AV_SAMPLE_FMT_U8_PLANAR] = { .name = "u8p", .bits = 8, .planar = 1 },
+ [AV_SAMPLE_FMT_S16_PLANAR] = { .name = "s16p", .bits = 16, .planar = 1 },
+ [AV_SAMPLE_FMT_S32_PLANAR] = { .name = "s32p", .bits = 32, .planar = 1 },
+ [AV_SAMPLE_FMT_FLT_PLANAR] = { .name = "fltp", .bits = 32, .planar = 1 },
+ [AV_SAMPLE_FMT_DBL_PLANAR] = { .name = "dblp", .bits = 64, .planar = 1 },
};

const char *av_get_sample_fmt_name(enum AVSampleFormat sample_fmt)
@@ -79,3 +85,10 @@ int av_get_bits_per_sample_fmt(enum AVSampleFormat sample_fmt)
0 : sample_fmt_info[sample_fmt].bits;
}
#endif
+
+int av_sample_fmt_is_planar(enum AVSampleFormat sample_fmt)
+{
+ if (sample_fmt < 0 || sample_fmt >= AV_SAMPLE_FMT_NB)
+ return 0;
+ return sample_fmt_info[sample_fmt].planar;
+}
diff --git a/libavutil/samplefmt.h b/libavutil/samplefmt.h
index e382149..5dfd7a6 100644
--- a/libavutil/samplefmt.h
+++ b/libavutil/samplefmt.h
@@ -31,6 +31,13 @@ enum AVSampleFormat {
AV_SAMPLE_FMT_S32, ///< signed 32 bits
AV_SAMPLE_FMT_FLT, ///< float
AV_SAMPLE_FMT_DBL, ///< double
+
+ AV_SAMPLE_FMT_U8_PLANAR, ///< unsigned 8 bits, planar
+ AV_SAMPLE_FMT_S16_PLANAR, ///< signed 16 bits, planar
+ AV_SAMPLE_FMT_S32_PLANAR, ///< signed 32 bits, planar
+ AV_SAMPLE_FMT_FLT_PLANAR, ///< float, planar
+ AV_SAMPLE_FMT_DBL_PLANAR, ///< double, planar
+
AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically
};

@@ -77,4 +84,12 @@ int av_get_bits_per_sample_fmt(enum AVSampleFormat sample_fmt);
*/
int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt);

+/**
+ * Check if the sample format is planar.
+ *
+ * @param sample_fmt the sample format to inspect
+ * @return 1 if the sample format is planar, 0 if it is interleaved
+ */
+int av_sample_fmt_is_planar(enum AVSampleFormat sample_fmt);
+
#endif /* AVUTIL_SAMPLEFMT_H */
--
1.7.1
Måns Rullgård
2011-11-12 22:51:22 UTC
Permalink
Post by Justin Ruggles
+ AV_SAMPLE_FMT_U8_PLANAR, ///< unsigned 8 bits, planar
+ AV_SAMPLE_FMT_S16_PLANAR, ///< signed 16 bits, planar
+ AV_SAMPLE_FMT_S32_PLANAR, ///< signed 32 bits, planar
+ AV_SAMPLE_FMT_FLT_PLANAR, ///< float, planar
+ AV_SAMPLE_FMT_DBL_PLANAR, ///< double, planar
Bikeshed: a simple P suffix (AV_SAMPLE_FMT_S16P etc) would be consistent
with pixel format names as well as being shorter.
--
Måns Rullgård
***@mansr.com
Ronald S. Bultje
2011-11-12 22:56:16 UTC
Permalink
Hi,
Post by Måns Rullgård
+    AV_SAMPLE_FMT_U8_PLANAR,   ///< unsigned 8 bits, planar
+    AV_SAMPLE_FMT_S16_PLANAR,  ///< signed 16 bits, planar
+    AV_SAMPLE_FMT_S32_PLANAR,  ///< signed 32 bits, planar
+    AV_SAMPLE_FMT_FLT_PLANAR,  ///< float, planar
+    AV_SAMPLE_FMT_DBL_PLANAR,  ///< double, planar
Bikeshed: a simple P suffix (AV_SAMPLE_FMT_S16P etc) would be consistent
with pixel format names as well as being shorter.
+1.

Ronald
Luca Barbato
2011-11-13 04:16:48 UTC
Permalink
Hi,
Post by Måns Rullgård
+ AV_SAMPLE_FMT_U8_PLANAR, ///< unsigned 8 bits, planar
+ AV_SAMPLE_FMT_S16_PLANAR, ///< signed 16 bits, planar
+ AV_SAMPLE_FMT_S32_PLANAR, ///< signed 32 bits, planar
+ AV_SAMPLE_FMT_FLT_PLANAR, ///< float, planar
+ AV_SAMPLE_FMT_DBL_PLANAR, ///< double, planar
Bikeshed: a simple P suffix (AV_SAMPLE_FMT_S16P etc) would be consistent
with pixel format names as well as being shorter.
+1.
+1
Justin Ruggles
2011-11-12 22:35:48 UTC
Permalink
Based on code by Stefano Sabatini.
---
doc/APIchanges | 4 ++
libavutil/avutil.h | 2 +-
libavutil/samplefmt.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++
libavutil/samplefmt.h | 64 +++++++++++++++++++++++++++++++++++++++
4 files changed, 149 insertions(+), 1 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 4549415..1063dad 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,10 @@ libavutil: 2011-04-18

API changes, most recent first:

+2011-xx-xx - xxxxxxx - lavu 51.16.0
+ Add av_samples_check_size(), av_samples_fill_arrays(), av_samples_alloc(),
+ and av_samples_free() to samplefmt.h.
+
2011-xx-xx - xxxxxxx - lavu 51.15.0
Add planar sample formats and av_sample_fmt_is_planar() to samplefmt.h.

diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index 4b226e0..436f79b 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -40,7 +40,7 @@
#define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)

#define LIBAVUTIL_VERSION_MAJOR 51
-#define LIBAVUTIL_VERSION_MINOR 15
+#define LIBAVUTIL_VERSION_MINOR 16
#define LIBAVUTIL_VERSION_MICRO 0

#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
diff --git a/libavutil/samplefmt.c b/libavutil/samplefmt.c
index d47c5e7..2bc2093 100644
--- a/libavutil/samplefmt.c
+++ b/libavutil/samplefmt.c
@@ -92,3 +92,83 @@ int av_sample_fmt_is_planar(enum AVSampleFormat sample_fmt)
return 0;
return sample_fmt_info[sample_fmt].planar;
}
+
+int av_samples_check_size(int nb_channels, int nb_samples,
+ enum AVSampleFormat sample_fmt, int align)
+{
+ int line_size;
+ int sample_size = av_get_bytes_per_sample(sample_fmt);
+ int planar = av_sample_fmt_is_planar(sample_fmt);
+
+ /* validate parameter ranges */
+ if (!sample_size || nb_samples <= 0 || nb_channels <= 0)
+ return AVERROR(EINVAL);
+
+ /* check for integer overflow in required buffer size */
+ if (nb_channels > INT_MAX / align ||
+ (int64_t)nb_channels * nb_samples > (INT_MAX - (align * nb_channels)) / sample_size)
+ return AVERROR(EINVAL);
+
+ line_size = planar ? FFALIGN(nb_samples * sample_size, align) :
+ FFALIGN(nb_samples * sample_size * nb_channels, align);
+
+ return planar ? line_size * nb_channels : line_size;
+}
+
+int av_samples_fill_arrays(uint8_t ***audio_data, int *linesize,
+ uint8_t *buf, int nb_channels, int nb_samples,
+ enum AVSampleFormat sample_fmt, int align)
+{
+ int ch, planar, buf_size, line_size;
+ uint8_t **pointers;
+
+ planar = av_sample_fmt_is_planar(sample_fmt);
+ buf_size = av_samples_check_size(nb_channels, nb_samples, sample_fmt, align);
+ if (buf_size < 0)
+ return buf_size;
+ line_size = planar ? buf_size / nb_channels : buf_size;
+
+ pointers = av_mallocz(sizeof(*pointers) * (planar ? nb_channels : 1));
+ if (!pointers)
+ return AVERROR(ENOMEM);
+ pointers[0] = buf;
+ for (ch = 1; planar && ch < nb_channels; ch++)
+ pointers[ch] = pointers[ch-1] + line_size;
+
+ *audio_data = pointers;
+ *linesize = line_size;
+
+ return 0;
+}
+
+int av_samples_alloc(uint8_t ***audio_data, int *linesize, int nb_channels,
+ int nb_samples, enum AVSampleFormat sample_fmt, int align)
+{
+ uint8_t *buf;
+ int size = av_samples_check_size(nb_channels, nb_samples, sample_fmt, align);
+ if (size < 0)
+ return size;
+
+ buf = av_mallocz(size);
+ if (!buf)
+ return AVERROR(ENOMEM);
+
+ size = av_samples_fill_arrays(audio_data, linesize, buf, nb_channels,
+ nb_samples, sample_fmt, align);
+ if (size < 0) {
+ av_free(buf);
+ return size;
+ }
+ return 0;
+}
+
+void av_samples_free(uint8_t **audio_data, int nb_channels,
+ enum AVSampleFormat sample_fmt)
+{
+ int ch;
+ int planar = av_sample_fmt_is_planar(sample_fmt);
+ av_free(audio_data[0]);
+ for (ch = 1; planar && ch < nb_channels; ch++)
+ av_free(audio_data[ch]);
+ av_freep(&audio_data);
+}
diff --git a/libavutil/samplefmt.h b/libavutil/samplefmt.h
index 5dfd7a6..1d130f3 100644
--- a/libavutil/samplefmt.h
+++ b/libavutil/samplefmt.h
@@ -92,4 +92,68 @@ int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt);
*/
int av_sample_fmt_is_planar(enum AVSampleFormat sample_fmt);

+/**
+ * Ensure that the parameters are valid and the buffer size is not too large.
+ *
+ * @param nb_channels the number of channels
+ * @param nb_samples the number of samples in a single channel
+ * @param sample_fmt the sample format
+ * @return required buffer size, or negative error code on failure
+ */
+int av_samples_check_size(int nb_channels, int nb_samples,
+ enum AVSampleFormat sample_fmt, int align);
+
+/**
+ * Fill channel data pointers and linesize for samples with sample
+ * format sample_fmt.
+ *
+ * The pointers array is filled with the pointers to the samples data:
+ * for planar, set the start point of each channel's data within the buffer,
+ * for packed, set the start point of the entire buffer only.
+ *
+ * The linesize array is filled with the aligned size of each channel's data
+ * buffer for planar layout, or the aligned size of the buffer for all channels
+ * for packed layout.
+ *
+ * @param[out] audio_data array to be filled with the pointer for each channel
+ * @param[out] linesize calculated linesize
+ * @param buf the pointer to a buffer containing the samples
+ * @param nb_channels the number of channels
+ * @param nb_samples the number of samples in a single channel
+ * @param sample_fmt the sample format
+ * @param align buffer size alignment (1 = no alignment required)
+ * @return 0 on success or a negative error code on failure
+ */
+int av_samples_fill_arrays(uint8_t ***audio_data, int *linesize, uint8_t *buf,
+ int nb_channels, int nb_samples,
+ enum AVSampleFormat sample_fmt, int align);
+
+/**
+ * Allocate a samples buffer for nb_samples samples, and fill data pointers and
+ * linesize accordingly.
+ * The allocated samples buffer has to be freed by calling av_samples_free()
+ * with the same nb_channels and sample_fmt values.
+ *
+ * @param[out] audio_data array to be filled with the pointer for each channel
+ * @param[out] linesize aligned size for audio buffer(s)
+ * @param nb_channels number of audio channels
+ * @param nb_samples number of samples per channel
+ * @param align buffer size alignment (1 = no alignment required)
+ * @return 0 on success or a negative error code on failure
+ * @see av_samples_fill_arrays()
+ */
+int av_samples_alloc(uint8_t ***audio_data, int *linesize, int nb_channels,
+ int nb_samples, enum AVSampleFormat sample_fmt, int align);
+
+/**
+ * Free a samples buffer.
+ * This frees the array of pointers as well as the data.
+ *
+ * @param audio_data array with data pointers for each channel
+ * @param nb_channels number of audio channels
+ * @param sample_fmt the sample format
+ */
+void av_samples_free(uint8_t **audio_data, int nb_channels,
+ enum AVSampleFormat sample_fmt);
+
#endif /* AVUTIL_SAMPLEFMT_H */
--
1.7.1
Anton Khirnov
2011-11-13 17:03:55 UTC
Permalink
Post by Justin Ruggles
diff --git a/libavutil/samplefmt.h b/libavutil/samplefmt.h
index 5dfd7a6..1d130f3 100644
--- a/libavutil/samplefmt.h
+++ b/libavutil/samplefmt.h
@@ -92,4 +92,68 @@ int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt);
*/
int av_sample_fmt_is_planar(enum AVSampleFormat sample_fmt);
+/**
+ * Ensure that the parameters are valid and the buffer size is not too large.
+ *
+ */
+int av_samples_check_size(int nb_channels, int nb_samples,
+ enum AVSampleFormat sample_fmt, int align);
I find the name and the doxy confusing. The main purpose of this
function is to calculate the required buffer size, right? But the
function name doesn't imply that and the description doesn't mention
this either.
Please expand/fix/rename.
--
Anton Khirnov
Justin Ruggles
2011-11-12 22:35:49 UTC
Permalink
Implement audio support in avcodec_default_get_buffer().
Deprecate avcodec_decode_audio3().

Note: This patch does not work by itself. All audio decoders must be converted
to use the new interface.
---
doc/APIchanges | 7 ++
libavcodec/avcodec.h | 108 +++++++++++++++++++++++++++++-
libavcodec/utils.c | 186 ++++++++++++++++++++++++++++++++++++++++++++-----
libavcodec/version.h | 5 +-
4 files changed, 285 insertions(+), 21 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 1063dad..0d41ea9 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,13 @@ libavutil: 2011-04-18

API changes, most recent first:

+2011-xx-xx - xxxxxxx - lavc 53.20.0
+ Add nb_samples field to AVFrame.
+ Deprecate AVCODEC_MAX_AUDIO_FRAME_SIZE.
+ Deprecate avcodec_decode_audio3() in favor of avcodec_decode_audio4().
+ avcodec_decode_audio4() writes output samples to an AVFrame, which allows
+ audio decoders to use get_buffer().
+
2011-xx-xx - xxxxxxx - lavu 51.16.0
Add av_samples_check_size(), av_samples_fill_arrays(), av_samples_alloc(),
and av_samples_free() to samplefmt.h.
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 7d506a1..de660cb 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -449,8 +449,10 @@ enum CodecID {
#define CH_LAYOUT_STEREO_DOWNMIX AV_CH_LAYOUT_STEREO_DOWNMIX
#endif

+#if FF_API_OLD_DECODE_AUDIO
/* in bytes */
#define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio
+#endif

/**
* Required number of additionally allocated bytes at the end of the input bitstream for decoding.
@@ -1154,6 +1156,26 @@ typedef struct AVFrame {
* - decoding: Set by libavcodec.
*/
void *thread_opaque;
+
+ /**
+ * number of audio samples (per channel) described by this frame
+ * - encoding: unused
+ * - decoding: Set by libavcodec
+ */
+ int nb_samples;
+
+ /**
+ * pointers to the audio data.
+ *
+ * For planar audio, each channel has a separate data pointer, and
+ * linesize[0] contains the size of each channel buffer.
+ * For packed audio, there is just one data pointer, and linesize[0]
+ * contains the total size of the buffer for all channels.
+ *
+ * encoding: unused
+ * decoding: set by libavcodec in AVCodecContext.get_buffer()
+ */
+ uint8_t **audio_data;
} AVFrame;

/**
@@ -2945,6 +2967,32 @@ typedef struct AVCodecContext {
#define AV_EF_BITSTREAM (1<<1)
#define AV_EF_BUFFER (1<<2)
#define AV_EF_EXPLODE (1<<3)
+
+#if FF_API_OLD_DECODE_AUDIO
+ /**
+ * NOT PART OF PUBLIC API
+ *
+ * User-allocated audio decoder output buffer.
+ * Used internally by libavcodec for backwards compatibility with
+ * avcodec_decode_audio3().
+ *
+ * @deprecated Deprecated because it will no longer be needed after the
+ * removal of avcodec_decode_audio3().
+ */
+ uint8_t *user_buffer;
+
+ /**
+ * NOT PART OF PUBLIC API
+ *
+ * Allocated size, in bytes, of AVCodecContext.user_buffer.
+ * Used internally by libavcodec for backwards compatibility with
+ * avcodec_decode_audio3().
+ *
+ * @deprecated Deprecated because it will no longer be needed after the
+ * removal of avcodec_decode_audio3().
+ */
+ int user_buffer_size;
+#endif
} AVCodecContext;

/**
@@ -3828,7 +3876,12 @@ int avcodec_open(AVCodecContext *avctx, AVCodec *codec);
*/
int avcodec_open2(AVCodecContext *avctx, AVCodec *codec, AVDictionary **options);

+#if FF_API_OLD_DECODE_AUDIO
/**
+ * Wrapper function which calls avcodec_decode_audio4.
+ *
+ * @deprecated Use avcodec_decode_audio4 instead.
+ *
* Decode the audio frame of size avpkt->size from avpkt->data into samples.
* Some decoders may support multiple frames in a single AVPacket, such
* decoders would then just decode the first frame. In this case,
@@ -3871,9 +3924,62 @@ int avcodec_open2(AVCodecContext *avctx, AVCodec *codec, AVDictionary **options)
* @return On error a negative value is returned, otherwise the number of bytes
* used or zero if no frame data was decompressed (used) from the input AVPacket.
*/
-int avcodec_decode_audio3(AVCodecContext *avctx, int16_t *samples,
+attribute_deprecated int avcodec_decode_audio3(AVCodecContext *avctx, int16_t *samples,
int *frame_size_ptr,
AVPacket *avpkt);
+#endif
+
+/**
+ * Decode the audio frame of size avpkt->size from avpkt->data into frame.
+ *
+ * Some decoders may support multiple frames in a single AVPacket. Such
+ * decoders would then just decode the first frame. In this case,
+ * avcodec_decode_audio4 has to be called again with an AVPacket containing
+ * the remaining data in order to decode the second frame, etc...
+ * If no frame could be output, got_frame_ptr is set to zero.
+ *
+ * @warning The input buffer, avpkt->data must be FF_INPUT_BUFFER_PADDING_SIZE
+ * larger than the actual read bytes because some optimized bitstream readers
+ * read 32 or 64 bits at once and could read over the end.
+ *
+ * @warning The end of the input buffer, avpkt->data, should be set to 0 to
+ * ensure that no overreading happens for damaged streams.
+ *
+ * @note You might have to align the input buffer, avpkt->data, and output
+ * buffer, frame->data[0]. The alignment requirements depend on the CPU: On
+ * some CPUs it isn't necessary at all, on others it won't work at all if not
+ * aligned and on others it will work but it will have an impact on performance.
+ *
+ * avpkt->data should have 4 byte alignment at minimum and frame->audio_data[]
+ * should be 32- or 16-byte-aligned unless the CPU doesn't need it.
+ * The avcodec_default_get_buffer() aligns the output buffer properly, but if
+ * the user overrides get_buffer() then alignment considerations should be
+ * taken into account.
+ *
+ * @param avctx the codec context
+ * @param[out] frame The AVFrame in which to store decoded audio samples.
+ * Use avcodec_alloc_frame to get an AVFrame, the codec will
+ * allocate memory for the actual sample buffer.
+ * With avcodec_default_get_buffer(), libavcodec frees/reuses the
+ * output buffer as it sees fit.
+ * With overridden get_buffer() (needs CODEC_CAP_DR1) the user
+ * decides into what buffer the decoder decodes. Unlike with
+ * video, audio decoders cannot use the buffer after returning,
+ * so they do not call release_buffer(), as it is assumed to be
+ * released immediately upon return.
+ * @param[out] got_frame_ptr Zero if no frame could be decoded, otherwise it is
+ * non-zero.
+ * @param[in] avpkt The input AVPacket containing the input buffer.
+ * You can create such packet with av_init_packet() and then setting
+ * data and size. Some decoders might also require additional fields
+ * to be set. All decoders are designed to use as few fields as
+ * possible though.
+ * @return A negative value error code is returned if an error occurred during
+ * decoding, otherwise the number of bytes consumed from the input
+ * AVPacket is returned.
+ */
+int avcodec_decode_audio4(AVCodecContext *avctx, AVFrame *frame,
+ int *got_frame_ptr, AVPacket *avpkt);

/**
* Decode the video frame of size avpkt->size from avpkt->data into picture.
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index cd8dce9..3978a4d 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -40,11 +40,14 @@
#include "thread.h"
#include "audioconvert.h"
#include "internal.h"
+#include "mathops.h"
#include <stdlib.h>
#include <stdarg.h>
#include <limits.h>
#include <float.h>

+#define SANE_NB_CHANNELS 128U
+
static int volatile entangled_thread_counter=0;
static int (*ff_lockmgr_cb)(void **mutex, enum AVLockOp op);
static void *codec_mutex;
@@ -132,6 +135,10 @@ typedef struct InternalBuffer{
int linesize[4];
int width, height;
enum PixelFormat pix_fmt;
+ uint8_t **audio_data;
+ int channels;
+ int nb_samples;
+ enum AVSampleFormat sample_fmt;
}InternalBuffer;

#define INTERNAL_BUFFER_SIZE (32+1)
@@ -244,7 +251,88 @@ void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){
*width=FFALIGN(*width, align);
}

-int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
+static int audio_get_buffer(AVCodecContext *avctx, AVFrame *frame)
+{
+ InternalBuffer *buf;
+ int buf_size, ret;
+
+ buf_size = av_samples_check_size(avctx->channels, frame->nb_samples,
+ avctx->sample_fmt, 32);
+ if (buf_size < 0)
+ return AVERROR(EINVAL);
+
+#if FF_API_OLD_DECODE_AUDIO
+ /* Check for use of user_buffer for backwards compatibility with
+ avcodec_decode_audio3() */
+ if (avctx->user_buffer) {
+ if (avctx->user_buffer_size < buf_size) {
+ av_log(avctx, AV_LOG_ERROR, "AVCodecContext.user_buffer_size is "
+ "too small for the requested frame. (%d < %d)\n",
+ avctx->user_buffer_size, buf_size);
+ return AVERROR(EINVAL);
+ }
+ frame->type = FF_BUFFER_TYPE_USER;
+ ret = av_samples_fill_arrays(&frame->audio_data, &frame->linesize[0],
+ avctx->user_buffer, avctx->channels,
+ frame->nb_samples, avctx->sample_fmt, 32);
+ if (ret)
+ return ret;
+
+ if (avctx->pkt) frame->pkt_pts = avctx->pkt->pts;
+ else frame->pkt_pts = AV_NOPTS_VALUE;
+ frame->reordered_opaque = avctx->reordered_opaque;
+
+ if (avctx->debug & FF_DEBUG_BUFFERS) {
+ av_log(avctx, AV_LOG_DEBUG, "default_get_buffer called on AVFrame %p, "
+ "AVCodecContext.user_buffer used\n", frame);
+ }
+ return 0;
+ }
+#endif
+ if (!avctx->internal_buffer) {
+ avctx->internal_buffer = av_mallocz(sizeof(InternalBuffer));
+ if (!avctx->internal_buffer)
+ return AVERROR(ENOMEM);
+ }
+ buf = avctx->internal_buffer;
+
+ if (buf->audio_data && buf->audio_data[0] &&
+ (buf->channels != avctx->channels ||
+ buf->nb_samples != frame->nb_samples ||
+ buf->sample_fmt != avctx->sample_fmt)) {
+ av_free(buf->audio_data[0]);
+ av_freep(&buf->audio_data);
+ }
+
+ if (!buf->audio_data) {
+ ret = av_samples_alloc(&buf->audio_data, &buf->linesize[0],
+ avctx->channels, frame->nb_samples,
+ avctx->sample_fmt, 32);
+ if (ret)
+ return ret;
+
+ buf->channels = avctx->channels;
+ buf->nb_samples = frame->nb_samples;
+ buf->sample_fmt = avctx->sample_fmt;
+ }
+ frame->type = FF_BUFFER_TYPE_INTERNAL;
+
+ frame->audio_data = buf->audio_data;
+ frame->linesize[0] = buf->linesize[0];
+
+ if (avctx->pkt) frame->pkt_pts = avctx->pkt->pts;
+ else frame->pkt_pts = AV_NOPTS_VALUE;
+ frame->reordered_opaque = avctx->reordered_opaque;
+
+ if (avctx->debug & FF_DEBUG_BUFFERS)
+ av_log(avctx, AV_LOG_DEBUG, "default_get_buffer called on frame %p, "
+ "internal audio buffer used\n", frame);
+
+ return 0;
+}
+
+static int video_get_buffer(AVCodecContext *s, AVFrame *pic)
+{
int i;
int w= s->width;
int h= s->height;
@@ -378,10 +466,24 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
return 0;
}

+int avcodec_default_get_buffer(AVCodecContext *avctx, AVFrame *frame)
+{
+ switch (avctx->codec_type) {
+ case AVMEDIA_TYPE_VIDEO:
+ return video_get_buffer(avctx, frame);
+ case AVMEDIA_TYPE_AUDIO:
+ return audio_get_buffer(avctx, frame);
+ default:
+ return -1;
+ }
+}
+
void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic){
int i;
InternalBuffer *buf, *last;

+ assert(s->codec_type == AVMEDIA_TYPE_VIDEO);
+
assert(pic->type==FF_BUFFER_TYPE_INTERNAL);
assert(s->internal_buffer_count);

@@ -413,6 +515,8 @@ int avcodec_default_reget_buffer(AVCodecContext *s, AVFrame *pic){
AVFrame temp_pic;
int i;

+ assert(s->codec_type == AVMEDIA_TYPE_VIDEO);
+
/* If no picture return a new buffer */
if(pic->data[0] == NULL) {
/* We will copy from buffer, so must be readable */
@@ -558,7 +662,6 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, AVCodec *codec, AVD
if (codec->decode)
av_freep(&avctx->subtitle_header);

-#define SANE_NB_CHANNELS 128U
if (avctx->channels > SANE_NB_CHANNELS) {
ret = AVERROR(EINVAL);
goto free_and_end;
@@ -755,11 +858,38 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi
return ret;
}

+#if FF_API_OLD_DECODE_AUDIO
int attribute_align_arg avcodec_decode_audio3(AVCodecContext *avctx, int16_t *samples,
int *frame_size_ptr,
AVPacket *avpkt)
{
- int ret;
+ AVFrame frame;
+ int ret, got_frame = 0;
+
+ assert(avctx->get_buffer == avcodec_default_get_buffer &&
+ avctx->release_buffer == avcodec_default_release_buffer);
+ avctx->user_buffer = (uint8_t *)samples;
+ avctx->user_buffer_size = *frame_size_ptr;
+
+ ret = avcodec_decode_audio4(avctx, &frame, &got_frame, avpkt);
+
+ if (ret >= 0 && got_frame) {
+ *frame_size_ptr = frame.linesize[0];
+ } else {
+ *frame_size_ptr = 0;
+ }
+ return ret;
+}
+#endif
+
+int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
+ AVFrame *frame,
+ int *got_frame_ptr,
+ AVPacket *avpkt)
+{
+ int ret = 0;
+
+ *got_frame_ptr = 0;

avctx->pkt = avpkt;

@@ -769,22 +899,11 @@ int attribute_align_arg avcodec_decode_audio3(AVCodecContext *avctx, int16_t *sa
}

if((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size){
- //FIXME remove the check below _after_ ensuring that all audio check that the available space is enough
- if(*frame_size_ptr < AVCODEC_MAX_AUDIO_FRAME_SIZE){
- av_log(avctx, AV_LOG_ERROR, "buffer smaller than AVCODEC_MAX_AUDIO_FRAME_SIZE\n");
- return -1;
- }
- if(*frame_size_ptr < FF_MIN_BUFFER_SIZE ||
- *frame_size_ptr < avctx->channels * avctx->frame_size * sizeof(int16_t)){
- av_log(avctx, AV_LOG_ERROR, "buffer %d too small\n", *frame_size_ptr);
- return -1;
+ ret = avctx->codec->decode(avctx, frame, got_frame_ptr, avpkt);
+ if (ret >= 0 && *got_frame_ptr) {
+ avctx->frame_number++;
+ frame->pkt_dts = avpkt->dts;
}
-
- ret = avctx->codec->decode(avctx, samples, frame_size_ptr, avpkt);
- avctx->frame_number++;
- }else{
- ret= 0;
- *frame_size_ptr=0;
}
return ret;
}
@@ -1108,7 +1227,8 @@ void avcodec_flush_buffers(AVCodecContext *avctx)
avctx->codec->flush(avctx);
}

-void avcodec_default_free_buffers(AVCodecContext *s){
+static void video_free_buffers(AVCodecContext *s)
+{
int i, j;

if(s->internal_buffer==NULL) return;
@@ -1127,6 +1247,34 @@ void avcodec_default_free_buffers(AVCodecContext *s){
s->internal_buffer_count=0;
}

+static void audio_free_buffers(AVCodecContext *avctx)
+{
+ InternalBuffer *buf;
+
+ if (avctx->internal_buffer == NULL)
+ return;
+
+ buf = (InternalBuffer *)avctx->internal_buffer;
+
+ av_samples_free(buf->audio_data, buf->channels, buf->sample_fmt);
+
+ av_freep(&avctx->internal_buffer);
+}
+
+void avcodec_default_free_buffers(AVCodecContext *avctx)
+{
+ switch (avctx->codec_type) {
+ case AVMEDIA_TYPE_VIDEO:
+ video_free_buffers(avctx);
+ break;
+ case AVMEDIA_TYPE_AUDIO:
+ audio_free_buffers(avctx);
+ break;
+ default:
+ break;
+ }
+}
+
#if FF_API_OLD_FF_PICT_TYPES
char av_get_pict_type_char(int pict_type){
return av_get_picture_type_char(pict_type);
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 756ad1d..95b95d2 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -21,7 +21,7 @@
#define AVCODEC_VERSION_H

#define LIBAVCODEC_VERSION_MAJOR 53
-#define LIBAVCODEC_VERSION_MINOR 19
+#define LIBAVCODEC_VERSION_MINOR 20
#define LIBAVCODEC_VERSION_MICRO 0

#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
@@ -104,5 +104,8 @@
#ifndef FF_API_PARSE_FRAME
#define FF_API_PARSE_FRAME (LIBAVCODEC_VERSION_MAJOR < 54)
#endif
+#ifndef FF_API_OLD_DECODE_AUDIO
+#define FF_API_OLD_DECODE_AUDIO (LIBAVCODEC_VERSION_MAJOR < 54)
+#endif

#endif /* AVCODEC_VERSION_H */
--
1.7.1
Måns Rullgård
2011-11-12 23:34:02 UTC
Permalink
Post by Justin Ruggles
Implement audio support in avcodec_default_get_buffer().
Deprecate avcodec_decode_audio3().
Note: This patch does not work by itself. All audio decoders must be converted
to use the new interface.
---
doc/APIchanges | 7 ++
libavcodec/avcodec.h | 108 +++++++++++++++++++++++++++++-
libavcodec/utils.c | 186 ++++++++++++++++++++++++++++++++++++++++++++-----
libavcodec/version.h | 5 +-
4 files changed, 285 insertions(+), 21 deletions(-)
[...]
Post by Justin Ruggles
@@ -1154,6 +1156,26 @@ typedef struct AVFrame {
* - decoding: Set by libavcodec.
*/
void *thread_opaque;
+
+ /**
+ * number of audio samples (per channel) described by this frame
+ * - encoding: unused
+ * - decoding: Set by libavcodec
+ */
+ int nb_samples;
+
+ /**
+ * pointers to the audio data.
+ *
+ * For planar audio, each channel has a separate data pointer, and
+ * linesize[0] contains the size of each channel buffer.
+ * For packed audio, there is just one data pointer, and linesize[0]
+ * contains the total size of the buffer for all channels.
+ *
+ * encoding: unused
+ * decoding: set by libavcodec in AVCodecContext.get_buffer()
+ */
+ uint8_t **audio_data;
} AVFrame;
The mini-allocation for the pointers is annoying in almost every way
possible.
Post by Justin Ruggles
/**
@@ -2945,6 +2967,32 @@ typedef struct AVCodecContext {
#define AV_EF_BITSTREAM (1<<1)
#define AV_EF_BUFFER (1<<2)
#define AV_EF_EXPLODE (1<<3)
+
+#if FF_API_OLD_DECODE_AUDIO
+ /**
+ * NOT PART OF PUBLIC API
+ *
+ * User-allocated audio decoder output buffer.
+ * Used internally by libavcodec for backwards compatibility with
+ * avcodec_decode_audio3().
+ *
+ * removal of avcodec_decode_audio3().
+ */
+ uint8_t *user_buffer;
+
+ /**
+ * NOT PART OF PUBLIC API
+ *
+ * Allocated size, in bytes, of AVCodecContext.user_buffer.
+ * Used internally by libavcodec for backwards compatibility with
+ * avcodec_decode_audio3().
+ *
+ * removal of avcodec_decode_audio3().
+ */
+ int user_buffer_size;
+#endif
} AVCodecContext;
This is going to cause compatibility hell and probably a host of other
nightmares.
Post by Justin Ruggles
+/**
+ * Decode the audio frame of size avpkt->size from avpkt->data into frame.
+ *
+ * Some decoders may support multiple frames in a single AVPacket. Such
+ * decoders would then just decode the first frame. In this case,
+ * avcodec_decode_audio4 has to be called again with an AVPacket containing
+ * the remaining data in order to decode the second frame, etc...
+ * If no frame could be output, got_frame_ptr is set to zero.
+ *
+ * larger than the actual read bytes because some optimized bitstream readers
+ * read 32 or 64 bits at once and could read over the end.
+ *
+ * ensure that no overreading happens for damaged streams.
+ *
+ * buffer, frame->data[0]. The alignment requirements depend on the CPU: On
+ * some CPUs it isn't necessary at all, on others it won't work at all if not
+ * aligned and on others it will work but it will have an impact on performance.
+ *
+ * avpkt->data should have 4 byte alignment at minimum and frame->audio_data[]
+ * should be 32- or 16-byte-aligned unless the CPU doesn't need it.
+ * The avcodec_default_get_buffer() aligns the output buffer properly, but if
+ * the user overrides get_buffer() then alignment considerations should be
+ * taken into account.
A bit too much copied from the avcodec_decode_video42 comment, I think.
Post by Justin Ruggles
+ * Use avcodec_alloc_frame to get an AVFrame, the codec will
+ * allocate memory for the actual sample buffer.
+ * With avcodec_default_get_buffer(), libavcodec frees/reuses the
+ * output buffer as it sees fit.
+ * With overridden get_buffer() (needs CODEC_CAP_DR1) the user
+ * decides into what buffer the decoder decodes. Unlike with
+ * video, audio decoders cannot use the buffer after returning,
+ * so they do not call release_buffer(), as it is assumed to be
+ * released immediately upon return.
+ * non-zero.
+ * You can create such packet with av_init_packet() and then setting
+ * data and size. Some decoders might also require additional fields
+ * to be set. All decoders are designed to use as few fields as
+ * possible though.
+ * decoding, otherwise the number of bytes consumed from the input
+ * AVPacket is returned.
+ */
+int avcodec_decode_audio4(AVCodecContext *avctx, AVFrame *frame,
+ int *got_frame_ptr, AVPacket *avpkt);
/**
* Decode the video frame of size avpkt->size from avpkt->data into picture.
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index cd8dce9..3978a4d 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -40,11 +40,14 @@
#include "thread.h"
#include "audioconvert.h"
#include "internal.h"
+#include "mathops.h"
#include <stdlib.h>
#include <stdarg.h>
#include <limits.h>
#include <float.h>
+#define SANE_NB_CHANNELS 128U
+
static int volatile entangled_thread_counter=0;
static int (*ff_lockmgr_cb)(void **mutex, enum AVLockOp op);
static void *codec_mutex;
@@ -132,6 +135,10 @@ typedef struct InternalBuffer{
int linesize[4];
int width, height;
enum PixelFormat pix_fmt;
+ uint8_t **audio_data;
+ int channels;
+ int nb_samples;
+ enum AVSampleFormat sample_fmt;
}InternalBuffer;
#define INTERNAL_BUFFER_SIZE (32+1)
@@ -244,7 +251,88 @@ void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){
*width=FFALIGN(*width, align);
}
-int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
+static int audio_get_buffer(AVCodecContext *avctx, AVFrame *frame)
+{
+ InternalBuffer *buf;
+ int buf_size, ret;
+
+ buf_size = av_samples_check_size(avctx->channels, frame->nb_samples,
+ avctx->sample_fmt, 32);
+ if (buf_size < 0)
+ return AVERROR(EINVAL);
+
+#if FF_API_OLD_DECODE_AUDIO
+ /* Check for use of user_buffer for backwards compatibility with
+ avcodec_decode_audio3() */
+ if (avctx->user_buffer) {
+ if (avctx->user_buffer_size < buf_size) {
+ av_log(avctx, AV_LOG_ERROR, "AVCodecContext.user_buffer_size is "
+ "too small for the requested frame. (%d < %d)\n",
+ avctx->user_buffer_size, buf_size);
+ return AVERROR(EINVAL);
+ }
+ frame->type = FF_BUFFER_TYPE_USER;
+ ret = av_samples_fill_arrays(&frame->audio_data, &frame->linesize[0],
+ avctx->user_buffer, avctx->channels,
+ frame->nb_samples, avctx->sample_fmt, 32);
+ if (ret)
+ return ret;
+
+ if (avctx->pkt) frame->pkt_pts = avctx->pkt->pts;
+ else frame->pkt_pts = AV_NOPTS_VALUE;
+ frame->reordered_opaque = avctx->reordered_opaque;
+
+ if (avctx->debug & FF_DEBUG_BUFFERS) {
+ av_log(avctx, AV_LOG_DEBUG, "default_get_buffer called on AVFrame %p, "
+ "AVCodecContext.user_buffer used\n", frame);
+ }
+ return 0;
+ }
+#endif
+ if (!avctx->internal_buffer) {
+ avctx->internal_buffer = av_mallocz(sizeof(InternalBuffer));
+ if (!avctx->internal_buffer)
+ return AVERROR(ENOMEM);
+ }
+ buf = avctx->internal_buffer;
+
+ if (buf->audio_data && buf->audio_data[0] &&
+ (buf->channels != avctx->channels ||
+ buf->nb_samples != frame->nb_samples ||
+ buf->sample_fmt != avctx->sample_fmt)) {
+ av_free(buf->audio_data[0]);
+ av_freep(&buf->audio_data);
+ }
+
+ if (!buf->audio_data) {
+ ret = av_samples_alloc(&buf->audio_data, &buf->linesize[0],
+ avctx->channels, frame->nb_samples,
+ avctx->sample_fmt, 32);
+ if (ret)
+ return ret;
+
+ buf->channels = avctx->channels;
+ buf->nb_samples = frame->nb_samples;
+ buf->sample_fmt = avctx->sample_fmt;
+ }
+ frame->type = FF_BUFFER_TYPE_INTERNAL;
+
+ frame->audio_data = buf->audio_data;
+ frame->linesize[0] = buf->linesize[0];
+
+ if (avctx->pkt) frame->pkt_pts = avctx->pkt->pts;
+ else frame->pkt_pts = AV_NOPTS_VALUE;
+ frame->reordered_opaque = avctx->reordered_opaque;
+
+ if (avctx->debug & FF_DEBUG_BUFFERS)
+ av_log(avctx, AV_LOG_DEBUG, "default_get_buffer called on frame %p, "
+ "internal audio buffer used\n", frame);
+
+ return 0;
+}
The complexity of those get_buffer functions always puzzles me, but
that's a debate for another day.
Post by Justin Ruggles
+static int video_get_buffer(AVCodecContext *s, AVFrame *pic)
+{
int i;
int w= s->width;
int h= s->height;
@@ -378,10 +466,24 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
return 0;
}
+int avcodec_default_get_buffer(AVCodecContext *avctx, AVFrame *frame)
+{
+ switch (avctx->codec_type) {
+ return video_get_buffer(avctx, frame);
+ return audio_get_buffer(avctx, frame);
+ return -1;
+ }
+}
+
void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic){
int i;
InternalBuffer *buf, *last;
+ assert(s->codec_type == AVMEDIA_TYPE_VIDEO);
What releases audio frames?
--
Måns Rullgård
***@mansr.com
Justin Ruggles
2011-11-13 16:40:21 UTC
Permalink
Post by Måns Rullgård
Post by Justin Ruggles
Implement audio support in avcodec_default_get_buffer().
Deprecate avcodec_decode_audio3().
Note: This patch does not work by itself. All audio decoders must be converted
to use the new interface.
---
doc/APIchanges | 7 ++
libavcodec/avcodec.h | 108 +++++++++++++++++++++++++++++-
libavcodec/utils.c | 186 ++++++++++++++++++++++++++++++++++++++++++++-----
libavcodec/version.h | 5 +-
4 files changed, 285 insertions(+), 21 deletions(-)
[...]
Post by Justin Ruggles
@@ -1154,6 +1156,26 @@ typedef struct AVFrame {
* - decoding: Set by libavcodec.
*/
void *thread_opaque;
+
+ /**
+ * number of audio samples (per channel) described by this frame
+ * - encoding: unused
+ * - decoding: Set by libavcodec
+ */
+ int nb_samples;
+
+ /**
+ * pointers to the audio data.
+ *
+ * For planar audio, each channel has a separate data pointer, and
+ * linesize[0] contains the size of each channel buffer.
+ * For packed audio, there is just one data pointer, and linesize[0]
+ * contains the total size of the buffer for all channels.
+ *
+ * encoding: unused
+ * decoding: set by libavcodec in AVCodecContext.get_buffer()
+ */
+ uint8_t **audio_data;
} AVFrame;
The mini-allocation for the pointers is annoying in almost every way
possible.
It is annoying, yes. But the general agreement on IRC was that people
wanted no limitation on the maximum number of channels.
Post by Måns Rullgård
Post by Justin Ruggles
/**
@@ -2945,6 +2967,32 @@ typedef struct AVCodecContext {
#define AV_EF_BITSTREAM (1<<1)
#define AV_EF_BUFFER (1<<2)
#define AV_EF_EXPLODE (1<<3)
+
+#if FF_API_OLD_DECODE_AUDIO
+ /**
+ * NOT PART OF PUBLIC API
+ *
+ * User-allocated audio decoder output buffer.
+ * Used internally by libavcodec for backwards compatibility with
+ * avcodec_decode_audio3().
+ *
+ * removal of avcodec_decode_audio3().
+ */
+ uint8_t *user_buffer;
+
+ /**
+ * NOT PART OF PUBLIC API
+ *
+ * Allocated size, in bytes, of AVCodecContext.user_buffer.
+ * Used internally by libavcodec for backwards compatibility with
+ * avcodec_decode_audio3().
+ *
+ * removal of avcodec_decode_audio3().
+ */
+ int user_buffer_size;
+#endif
} AVCodecContext;
This is going to cause compatibility hell and probably a host of other
nightmares.
In what way? The only other way I could come up with to keep
compatibility with avcodec_decode_audio3() is to memcpy from the
get_buffer() data into the user-supplied data.
Post by Måns Rullgård
Post by Justin Ruggles
+/**
+ * Decode the audio frame of size avpkt->size from avpkt->data into frame.
+ *
+ * Some decoders may support multiple frames in a single AVPacket. Such
+ * decoders would then just decode the first frame. In this case,
+ * avcodec_decode_audio4 has to be called again with an AVPacket containing
+ * the remaining data in order to decode the second frame, etc...
+ * If no frame could be output, got_frame_ptr is set to zero.
+ *
+ * larger than the actual read bytes because some optimized bitstream readers
+ * read 32 or 64 bits at once and could read over the end.
+ *
+ * ensure that no overreading happens for damaged streams.
+ *
+ * buffer, frame->data[0]. The alignment requirements depend on the CPU: On
+ * some CPUs it isn't necessary at all, on others it won't work at all if not
+ * aligned and on others it will work but it will have an impact on performance.
+ *
+ * avpkt->data should have 4 byte alignment at minimum and frame->audio_data[]
+ * should be 32- or 16-byte-aligned unless the CPU doesn't need it.
+ * The avcodec_default_get_buffer() aligns the output buffer properly, but if
+ * the user overrides get_buffer() then alignment considerations should be
+ * taken into account.
A bit too much copied from the avcodec_decode_video42 comment, I think.
yeah, I know. Where would you suggest we put the notes and warnings that
are common to both in a way that would make sense in the doxygen
documentation? Diego, any suggestions here?
Post by Måns Rullgård
Post by Justin Ruggles
+ * Use avcodec_alloc_frame to get an AVFrame, the codec will
+ * allocate memory for the actual sample buffer.
+ * With avcodec_default_get_buffer(), libavcodec frees/reuses the
+ * output buffer as it sees fit.
+ * With overridden get_buffer() (needs CODEC_CAP_DR1) the user
+ * decides into what buffer the decoder decodes. Unlike with
+ * video, audio decoders cannot use the buffer after returning,
+ * so they do not call release_buffer(), as it is assumed to be
+ * released immediately upon return.
+ * non-zero.
+ * You can create such packet with av_init_packet() and then setting
+ * data and size. Some decoders might also require additional fields
+ * to be set. All decoders are designed to use as few fields as
+ * possible though.
+ * decoding, otherwise the number of bytes consumed from the input
+ * AVPacket is returned.
+ */
+int avcodec_decode_audio4(AVCodecContext *avctx, AVFrame *frame,
+ int *got_frame_ptr, AVPacket *avpkt);
/**
* Decode the video frame of size avpkt->size from avpkt->data into picture.
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index cd8dce9..3978a4d 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -40,11 +40,14 @@
#include "thread.h"
#include "audioconvert.h"
#include "internal.h"
+#include "mathops.h"
#include <stdlib.h>
#include <stdarg.h>
#include <limits.h>
#include <float.h>
+#define SANE_NB_CHANNELS 128U
+
static int volatile entangled_thread_counter=0;
static int (*ff_lockmgr_cb)(void **mutex, enum AVLockOp op);
static void *codec_mutex;
@@ -132,6 +135,10 @@ typedef struct InternalBuffer{
int linesize[4];
int width, height;
enum PixelFormat pix_fmt;
+ uint8_t **audio_data;
+ int channels;
+ int nb_samples;
+ enum AVSampleFormat sample_fmt;
}InternalBuffer;
#define INTERNAL_BUFFER_SIZE (32+1)
@@ -244,7 +251,88 @@ void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){
*width=FFALIGN(*width, align);
}
-int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
+static int audio_get_buffer(AVCodecContext *avctx, AVFrame *frame)
+{
+ InternalBuffer *buf;
+ int buf_size, ret;
+
+ buf_size = av_samples_check_size(avctx->channels, frame->nb_samples,
+ avctx->sample_fmt, 32);
+ if (buf_size < 0)
+ return AVERROR(EINVAL);
+
+#if FF_API_OLD_DECODE_AUDIO
+ /* Check for use of user_buffer for backwards compatibility with
+ avcodec_decode_audio3() */
+ if (avctx->user_buffer) {
+ if (avctx->user_buffer_size < buf_size) {
+ av_log(avctx, AV_LOG_ERROR, "AVCodecContext.user_buffer_size is "
+ "too small for the requested frame. (%d < %d)\n",
+ avctx->user_buffer_size, buf_size);
+ return AVERROR(EINVAL);
+ }
+ frame->type = FF_BUFFER_TYPE_USER;
+ ret = av_samples_fill_arrays(&frame->audio_data, &frame->linesize[0],
+ avctx->user_buffer, avctx->channels,
+ frame->nb_samples, avctx->sample_fmt, 32);
+ if (ret)
+ return ret;
+
+ if (avctx->pkt) frame->pkt_pts = avctx->pkt->pts;
+ else frame->pkt_pts = AV_NOPTS_VALUE;
+ frame->reordered_opaque = avctx->reordered_opaque;
+
+ if (avctx->debug & FF_DEBUG_BUFFERS) {
+ av_log(avctx, AV_LOG_DEBUG, "default_get_buffer called on AVFrame %p, "
+ "AVCodecContext.user_buffer used\n", frame);
+ }
+ return 0;
+ }
+#endif
+ if (!avctx->internal_buffer) {
+ avctx->internal_buffer = av_mallocz(sizeof(InternalBuffer));
+ if (!avctx->internal_buffer)
+ return AVERROR(ENOMEM);
+ }
+ buf = avctx->internal_buffer;
+
+ if (buf->audio_data && buf->audio_data[0] &&
+ (buf->channels != avctx->channels ||
+ buf->nb_samples != frame->nb_samples ||
+ buf->sample_fmt != avctx->sample_fmt)) {
+ av_free(buf->audio_data[0]);
+ av_freep(&buf->audio_data);
+ }
+
+ if (!buf->audio_data) {
+ ret = av_samples_alloc(&buf->audio_data, &buf->linesize[0],
+ avctx->channels, frame->nb_samples,
+ avctx->sample_fmt, 32);
+ if (ret)
+ return ret;
+
+ buf->channels = avctx->channels;
+ buf->nb_samples = frame->nb_samples;
+ buf->sample_fmt = avctx->sample_fmt;
+ }
+ frame->type = FF_BUFFER_TYPE_INTERNAL;
+
+ frame->audio_data = buf->audio_data;
+ frame->linesize[0] = buf->linesize[0];
+
+ if (avctx->pkt) frame->pkt_pts = avctx->pkt->pts;
+ else frame->pkt_pts = AV_NOPTS_VALUE;
+ frame->reordered_opaque = avctx->reordered_opaque;
+
+ if (avctx->debug & FF_DEBUG_BUFFERS)
+ av_log(avctx, AV_LOG_DEBUG, "default_get_buffer called on frame %p, "
+ "internal audio buffer used\n", frame);
+
+ return 0;
+}
The complexity of those get_buffer functions always puzzles me, but
that's a debate for another day.
Well, I can at least change it to only keep track of the size instead of
all the relevant params.
Post by Måns Rullgård
Post by Justin Ruggles
+static int video_get_buffer(AVCodecContext *s, AVFrame *pic)
+{
int i;
int w= s->width;
int h= s->height;
@@ -378,10 +466,24 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
return 0;
}
+int avcodec_default_get_buffer(AVCodecContext *avctx, AVFrame *frame)
+{
+ switch (avctx->codec_type) {
+ return video_get_buffer(avctx, frame);
+ return audio_get_buffer(avctx, frame);
+ return -1;
+ }
+}
+
void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic){
int i;
InternalBuffer *buf, *last;
+ assert(s->codec_type == AVMEDIA_TYPE_VIDEO);
What releases audio frames?
It's in the documentation. Audio frames are considered released after
decode() returns, and the decoder cannot reuse the buffer in subsequent
calls. I got requests to have get_buffer()-provided buffers only be used
for the next sequential output data so that the user can have direct
decoding into a contiguous buffer. Not that it would always work anyway
due to alignment, but it would work in most cases.

We could have it call release_buffer() in avcodec_decode_audio4() after
decode() returns. We just need to guarantee the behavior in the API in
order for it to be useful to users.

Cheers,
Justin
Måns Rullgård
2011-11-13 17:41:54 UTC
Permalink
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
Implement audio support in avcodec_default_get_buffer().
Deprecate avcodec_decode_audio3().
Note: This patch does not work by itself. All audio decoders must be converted
to use the new interface.
---
doc/APIchanges | 7 ++
libavcodec/avcodec.h | 108 +++++++++++++++++++++++++++++-
libavcodec/utils.c | 186 ++++++++++++++++++++++++++++++++++++++++++++-----
libavcodec/version.h | 5 +-
4 files changed, 285 insertions(+), 21 deletions(-)
[...]
Post by Justin Ruggles
@@ -1154,6 +1156,26 @@ typedef struct AVFrame {
* - decoding: Set by libavcodec.
*/
void *thread_opaque;
+
+ /**
+ * number of audio samples (per channel) described by this frame
+ * - encoding: unused
+ * - decoding: Set by libavcodec
+ */
+ int nb_samples;
+
+ /**
+ * pointers to the audio data.
+ *
+ * For planar audio, each channel has a separate data pointer, and
+ * linesize[0] contains the size of each channel buffer.
+ * For packed audio, there is just one data pointer, and linesize[0]
+ * contains the total size of the buffer for all channels.
+ *
+ * encoding: unused
+ * decoding: set by libavcodec in AVCodecContext.get_buffer()
+ */
+ uint8_t **audio_data;
} AVFrame;
The mini-allocation for the pointers is annoying in almost every way
possible.
It is annoying, yes. But the general agreement on IRC
So all the sane people have left IRC.
Post by Justin Ruggles
was that people wanted no limitation on the maximum number of
channels.
A C99 flexible array member at the end of the struct would solve that
problem. Alternatively (and/or additionally) the data[4] array could be
used (and possibly extended to, say, 8 entries) when it is large enough.
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
/**
@@ -2945,6 +2967,32 @@ typedef struct AVCodecContext {
#define AV_EF_BITSTREAM (1<<1)
#define AV_EF_BUFFER (1<<2)
#define AV_EF_EXPLODE (1<<3)
+
+#if FF_API_OLD_DECODE_AUDIO
+ /**
+ * NOT PART OF PUBLIC API
+ *
+ * User-allocated audio decoder output buffer.
+ * Used internally by libavcodec for backwards compatibility with
+ * avcodec_decode_audio3().
+ *
+ * removal of avcodec_decode_audio3().
+ */
+ uint8_t *user_buffer;
+
+ /**
+ * NOT PART OF PUBLIC API
+ *
+ * Allocated size, in bytes, of AVCodecContext.user_buffer.
+ * Used internally by libavcodec for backwards compatibility with
+ * avcodec_decode_audio3().
+ *
+ * removal of avcodec_decode_audio3().
+ */
+ int user_buffer_size;
+#endif
} AVCodecContext;
This is going to cause compatibility hell and probably a host of other
nightmares.
In what way?
What happens if we need to add more fields before these get removed?
Not to mention it's bloody ugly.
Post by Justin Ruggles
The only other way I could come up with to keep compatibility with
avcodec_decode_audio3() is to memcpy from the get_buffer() data into
the user-supplied data.
That would probably be acceptable as a temporary compatibility solution.

A more general solution would be to add an opaque pointer to a struct
AVCodecInteral or whatever that we could do whatever we want to without
compatibility concerns at all. There are already a few other fields in
AVCodecContext that could be moved to such a hidden struct. We might as
well fix this properly once and for all instead of making it worse.
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
+/**
+ * Decode the audio frame of size avpkt->size from avpkt->data into frame.
+ *
+ * Some decoders may support multiple frames in a single AVPacket. Such
+ * decoders would then just decode the first frame. In this case,
+ * avcodec_decode_audio4 has to be called again with an AVPacket containing
+ * the remaining data in order to decode the second frame, etc...
+ * If no frame could be output, got_frame_ptr is set to zero.
+ *
+ * larger than the actual read bytes because some optimized bitstream readers
+ * read 32 or 64 bits at once and could read over the end.
+ *
+ * ensure that no overreading happens for damaged streams.
+ *
+ * buffer, frame->data[0]. The alignment requirements depend on the CPU: On
+ * some CPUs it isn't necessary at all, on others it won't work at all if not
+ * aligned and on others it will work but it will have an impact on performance.
+ *
+ * avpkt->data should have 4 byte alignment at minimum and frame->audio_data[]
+ * should be 32- or 16-byte-aligned unless the CPU doesn't need it.
+ * The avcodec_default_get_buffer() aligns the output buffer properly, but if
+ * the user overrides get_buffer() then alignment considerations should be
+ * taken into account.
A bit too much copied from the avcodec_decode_video42 comment, I think.
yeah, I know. Where would you suggest we put the notes and warnings that
are common to both in a way that would make sense in the doxygen
documentation? Diego, any suggestions here?
Some of the copied bits are plain wrong for this function. At least
update the field names to match those actually used.
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
+static int video_get_buffer(AVCodecContext *s, AVFrame *pic)
+{
int i;
int w= s->width;
int h= s->height;
@@ -378,10 +466,24 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
return 0;
}
+int avcodec_default_get_buffer(AVCodecContext *avctx, AVFrame *frame)
+{
+ switch (avctx->codec_type) {
+ return video_get_buffer(avctx, frame);
+ return audio_get_buffer(avctx, frame);
+ return -1;
+ }
+}
+
void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic){
int i;
InternalBuffer *buf, *last;
+ assert(s->codec_type == AVMEDIA_TYPE_VIDEO);
What releases audio frames?
It's in the documentation. Audio frames are considered released after
decode() returns, and the decoder cannot reuse the buffer in subsequent
calls. I got requests to have get_buffer()-provided buffers only be used
for the next sequential output data so that the user can have direct
decoding into a contiguous buffer. Not that it would always work anyway
due to alignment, but it would work in most cases.
We could have it call release_buffer() in avcodec_decode_audio4() after
decode() returns. We just need to guarantee the behavior in the API in
order for it to be useful to users.
What happens if an input packet doesn't complete an output frame?
What if an input packet results in multiple output frames?
--
Måns Rullgård
***@mansr.com
Janne Grunau
2011-11-13 18:23:54 UTC
Permalink
Post by Måns Rullgård
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
Implement audio support in avcodec_default_get_buffer().
Deprecate avcodec_decode_audio3().
Note: This patch does not work by itself. All audio decoders must be converted
to use the new interface.
---
@@ -1154,6 +1156,26 @@ typedef struct AVFrame {
[...]
Post by Måns Rullgård
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
+ */
+ uint8_t **audio_data;
} AVFrame;
The mini-allocation for the pointers is annoying in almost every way
possible.
It is annoying, yes. But the general agreement on IRC
was that people wanted no limitation on the maximum number of
channels.
A C99 flexible array member at the end of the struct would solve that
problem. Alternatively (and/or additionally) the data[4] array could be
used (and possibly extended to, say, 8 entries) when it is large enough.
Having a flexible array member would require a major bump for adding
fields to AVFrame. It is a pitty that data[] has only four entries.
If it had at least 8 entries (i.e. covering the majority of the cases)
we could have allocated array of pointers for the uncommon cases. It
still sucks to have two different api depending on the number of
channels. So I don't see a good solution to this problem.

How far away is the next libavcodec major bump? It might be easier to
find a solution if we can change AVFrame as we like.
Post by Måns Rullgård
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
/**
@@ -2945,6 +2967,32 @@ typedef struct AVCodecContext {
#define AV_EF_BITSTREAM (1<<1)
#define AV_EF_BUFFER (1<<2)
#define AV_EF_EXPLODE (1<<3)
+
+#if FF_API_OLD_DECODE_AUDIO
+ /**
+ * NOT PART OF PUBLIC API
+ *
+ * User-allocated audio decoder output buffer.
+ * Used internally by libavcodec for backwards compatibility with
+ * avcodec_decode_audio3().
+ *
+ * removal of avcodec_decode_audio3().
+ */
+ uint8_t *user_buffer;
+
+ /**
+ * NOT PART OF PUBLIC API
+ *
+ * Allocated size, in bytes, of AVCodecContext.user_buffer.
+ * Used internally by libavcodec for backwards compatibility with
+ * avcodec_decode_audio3().
+ *
+ * removal of avcodec_decode_audio3().
+ */
+ int user_buffer_size;
+#endif
} AVCodecContext;
This is going to cause compatibility hell and probably a host of other
nightmares.
In what way?
What happens if we need to add more fields before these get removed?
How is that going to be a problem? I don't see how these fields differ
from any other field we remove after a major bump. But I agree that it
is preferable to add a struct for internal stuff and not exposing
internals needlessly.

Janne
Måns Rullgård
2011-11-13 18:28:43 UTC
Permalink
Post by Janne Grunau
Post by Måns Rullgård
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
Implement audio support in avcodec_default_get_buffer().
Deprecate avcodec_decode_audio3().
Note: This patch does not work by itself. All audio decoders must be converted
to use the new interface.
---
@@ -1154,6 +1156,26 @@ typedef struct AVFrame {
[...]
Post by Måns Rullgård
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
+ */
+ uint8_t **audio_data;
} AVFrame;
The mini-allocation for the pointers is annoying in almost every way
possible.
It is annoying, yes. But the general agreement on IRC
was that people wanted no limitation on the maximum number of
channels.
A C99 flexible array member at the end of the struct would solve that
problem. Alternatively (and/or additionally) the data[4] array could be
used (and possibly extended to, say, 8 entries) when it is large enough.
Having a flexible array member would require a major bump for adding
fields to AVFrame.
True.
Post by Janne Grunau
It is a pitty that data[] has only four entries.
So extend it on the next version bump.
Post by Janne Grunau
If it had at least 8 entries (i.e. covering the majority of the cases)
we could have allocated array of pointers for the uncommon cases. It
still sucks to have two different api depending on the number of
channels. So I don't see a good solution to this problem.
There won't be two APIs. Simply set frame.audio_data = frame.data if
the number of channels is small enough. You'll need to be a little
careful when freeing a frame, that's all.
--
Måns Rullgård
***@mansr.com
Justin Ruggles
2011-11-13 19:07:02 UTC
Permalink
Post by Måns Rullgård
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
Implement audio support in avcodec_default_get_buffer().
Deprecate avcodec_decode_audio3().
Note: This patch does not work by itself. All audio decoders must be converted
to use the new interface.
---
doc/APIchanges | 7 ++
libavcodec/avcodec.h | 108 +++++++++++++++++++++++++++++-
libavcodec/utils.c | 186 ++++++++++++++++++++++++++++++++++++++++++++-----
libavcodec/version.h | 5 +-
4 files changed, 285 insertions(+), 21 deletions(-)
[...]
Post by Justin Ruggles
@@ -1154,6 +1156,26 @@ typedef struct AVFrame {
* - decoding: Set by libavcodec.
*/
void *thread_opaque;
+
+ /**
+ * number of audio samples (per channel) described by this frame
+ * - encoding: unused
+ * - decoding: Set by libavcodec
+ */
+ int nb_samples;
+
+ /**
+ * pointers to the audio data.
+ *
+ * For planar audio, each channel has a separate data pointer, and
+ * linesize[0] contains the size of each channel buffer.
+ * For packed audio, there is just one data pointer, and linesize[0]
+ * contains the total size of the buffer for all channels.
+ *
+ * encoding: unused
+ * decoding: set by libavcodec in AVCodecContext.get_buffer()
+ */
+ uint8_t **audio_data;
} AVFrame;
The mini-allocation for the pointers is annoying in almost every way
possible.
It is annoying, yes. But the general agreement on IRC
So all the sane people have left IRC.
Post by Justin Ruggles
was that people wanted no limitation on the maximum number of
channels.
A C99 flexible array member at the end of the struct would solve that
problem. Alternatively (and/or additionally) the data[4] array could be
used (and possibly extended to, say, 8 entries) when it is large enough.
Ok, using data[] when possible instead of allocating the pointers does
make sense.
Post by Måns Rullgård
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
/**
@@ -2945,6 +2967,32 @@ typedef struct AVCodecContext {
#define AV_EF_BITSTREAM (1<<1)
#define AV_EF_BUFFER (1<<2)
#define AV_EF_EXPLODE (1<<3)
+
+#if FF_API_OLD_DECODE_AUDIO
+ /**
+ * NOT PART OF PUBLIC API
+ *
+ * User-allocated audio decoder output buffer.
+ * Used internally by libavcodec for backwards compatibility with
+ * avcodec_decode_audio3().
+ *
+ * removal of avcodec_decode_audio3().
+ */
+ uint8_t *user_buffer;
+
+ /**
+ * NOT PART OF PUBLIC API
+ *
+ * Allocated size, in bytes, of AVCodecContext.user_buffer.
+ * Used internally by libavcodec for backwards compatibility with
+ * avcodec_decode_audio3().
+ *
+ * removal of avcodec_decode_audio3().
+ */
+ int user_buffer_size;
+#endif
} AVCodecContext;
This is going to cause compatibility hell and probably a host of other
nightmares.
In what way?
What happens if we need to add more fields before these get removed?
Not to mention it's bloody ugly.
They're still part of the ABI, even if they're not meant to be used
directly by users. So no problems with more fields being added. and
yeah, it is ugly. I like the AVCodecInternal idea.
Post by Måns Rullgård
Post by Justin Ruggles
The only other way I could come up with to keep compatibility with
avcodec_decode_audio3() is to memcpy from the get_buffer() data into
the user-supplied data.
That would probably be acceptable as a temporary compatibility solution.
A more general solution would be to add an opaque pointer to a struct
AVCodecInteral or whatever that we could do whatever we want to without
compatibility concerns at all. There are already a few other fields in
AVCodecContext that could be moved to such a hidden struct. We might as
well fix this properly once and for all instead of making it worse.
I'll try to implement it with AVCodecInternal so we can make it work
cleanly without the extra memcpy().
Post by Måns Rullgård
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
+/**
+ * Decode the audio frame of size avpkt->size from avpkt->data into frame.
+ *
+ * Some decoders may support multiple frames in a single AVPacket. Such
+ * decoders would then just decode the first frame. In this case,
+ * avcodec_decode_audio4 has to be called again with an AVPacket containing
+ * the remaining data in order to decode the second frame, etc...
+ * If no frame could be output, got_frame_ptr is set to zero.
+ *
+ * larger than the actual read bytes because some optimized bitstream readers
+ * read 32 or 64 bits at once and could read over the end.
+ *
+ * ensure that no overreading happens for damaged streams.
+ *
+ * buffer, frame->data[0]. The alignment requirements depend on the CPU: On
+ * some CPUs it isn't necessary at all, on others it won't work at all if not
+ * aligned and on others it will work but it will have an impact on performance.
+ *
+ * avpkt->data should have 4 byte alignment at minimum and frame->audio_data[]
+ * should be 32- or 16-byte-aligned unless the CPU doesn't need it.
+ * The avcodec_default_get_buffer() aligns the output buffer properly, but if
+ * the user overrides get_buffer() then alignment considerations should be
+ * taken into account.
A bit too much copied from the avcodec_decode_video42 comment, I think.
yeah, I know. Where would you suggest we put the notes and warnings that
are common to both in a way that would make sense in the doxygen
documentation? Diego, any suggestions here?
Some of the copied bits are plain wrong for this function. At least
update the field names to match those actually used.
I did miss changing one data to audio_data. That was just an oversight.
And I'm guessing you mean the warning about setting the padding to 0,
which I suppose isn't necessary for audio.
Post by Måns Rullgård
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
+static int video_get_buffer(AVCodecContext *s, AVFrame *pic)
+{
int i;
int w= s->width;
int h= s->height;
@@ -378,10 +466,24 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
return 0;
}
+int avcodec_default_get_buffer(AVCodecContext *avctx, AVFrame *frame)
+{
+ switch (avctx->codec_type) {
+ return video_get_buffer(avctx, frame);
+ return audio_get_buffer(avctx, frame);
+ return -1;
+ }
+}
+
void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic){
int i;
InternalBuffer *buf, *last;
+ assert(s->codec_type == AVMEDIA_TYPE_VIDEO);
What releases audio frames?
It's in the documentation. Audio frames are considered released after
decode() returns, and the decoder cannot reuse the buffer in subsequent
calls. I got requests to have get_buffer()-provided buffers only be used
for the next sequential output data so that the user can have direct
decoding into a contiguous buffer. Not that it would always work anyway
due to alignment, but it would work in most cases.
We could have it call release_buffer() in avcodec_decode_audio4() after
decode() returns. We just need to guarantee the behavior in the API in
order for it to be useful to users.
What happens if an input packet doesn't complete an output frame?
What if an input packet results in multiple output frames?
If the input packet doesn't complete an output frame, the decoder
doesn't output anything. We require full packets; that's what parsers
are for. If some oddball codec has a full packet that only gives a
partial frame, the decoder can use an internal buffer for either the
complete-but-not-really packet or partially decoded frame and then send
the decoded data back only when it has a full frame. We only have 1
decoder that I know of right now (shorten) that is like this this
because no parser is possible. It copies packet data to an internal
buffer until it has what it thinks is enough for a frame.

If the input packet results in multiple output frames, the decoder has
several choices:

1) it can request a large enough buffer for all the frames and send them
all at once. we have several decoders which do this.

2) it can only decode part of the packet and send one frame of output.
then the remaining packet data is fed in the next call.

3) it can copy the whole packet locally and decode only one frame at a
time per decode() call (e.g. used for speex where frame boundaries are
not byte-aligned)

4) it can decode the whole packet into a private buffer and only output
one frame at a time to the user. this shouldn't really be necessary, but
it still works.

-Justin
Måns Rullgård
2011-11-13 20:02:54 UTC
Permalink
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
+ uint8_t **audio_data;
} AVFrame;
The mini-allocation for the pointers is annoying in almost every way
possible.
It is annoying, yes. But the general agreement on IRC
So all the sane people have left IRC.
Post by Justin Ruggles
was that people wanted no limitation on the maximum number of
channels.
A C99 flexible array member at the end of the struct would solve that
problem. Alternatively (and/or additionally) the data[4] array could be
used (and possibly extended to, say, 8 entries) when it is large enough.
Ok, using data[] when possible instead of allocating the pointers does
make sense.
Let's do that.
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
+#if FF_API_OLD_DECODE_AUDIO
+ /**
+ * NOT PART OF PUBLIC API
+ *
+ * User-allocated audio decoder output buffer.
+ * Used internally by libavcodec for backwards compatibility with
+ * avcodec_decode_audio3().
+ *
+ * removal of avcodec_decode_audio3().
+ */
+ uint8_t *user_buffer;
+
+ /**
+ * NOT PART OF PUBLIC API
+ *
+ * Allocated size, in bytes, of AVCodecContext.user_buffer.
+ * Used internally by libavcodec for backwards compatibility with
+ * avcodec_decode_audio3().
+ *
+ * removal of avcodec_decode_audio3().
+ */
+ int user_buffer_size;
+#endif
} AVCodecContext;
This is going to cause compatibility hell and probably a host of other
nightmares.
The only other way I could come up with to keep compatibility with
avcodec_decode_audio3() is to memcpy from the get_buffer() data into
the user-supplied data.
That would probably be acceptable as a temporary compatibility solution.
A more general solution would be to add an opaque pointer to a struct
AVCodecInteral or whatever that we could do whatever we want to without
compatibility concerns at all. There are already a few other fields in
AVCodecContext that could be moved to such a hidden struct. We might as
well fix this properly once and for all instead of making it worse.
I'll try to implement it with AVCodecInternal so we can make it work
cleanly without the extra memcpy().
Great.
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
Post by Måns Rullgård
Post by Justin Ruggles
void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic){
int i;
InternalBuffer *buf, *last;
+ assert(s->codec_type == AVMEDIA_TYPE_VIDEO);
What releases audio frames?
It's in the documentation. Audio frames are considered released after
decode() returns, and the decoder cannot reuse the buffer in subsequent
calls. I got requests to have get_buffer()-provided buffers only be used
for the next sequential output data so that the user can have direct
decoding into a contiguous buffer. Not that it would always work anyway
due to alignment, but it would work in most cases.
We could have it call release_buffer() in avcodec_decode_audio4() after
decode() returns. We just need to guarantee the behavior in the API in
order for it to be useful to users.
What happens if an input packet doesn't complete an output frame?
What if an input packet results in multiple output frames?
If the input packet doesn't complete an output frame, the decoder
doesn't output anything. We require full packets; that's what parsers
are for. If some oddball codec has a full packet that only gives a
partial frame, the decoder can use an internal buffer for either the
complete-but-not-really packet or partially decoded frame and then send
the decoded data back only when it has a full frame. We only have 1
decoder that I know of right now (shorten) that is like this this
because no parser is possible. It copies packet data to an internal
buffer until it has what it thinks is enough for a frame.
If the input packet results in multiple output frames, the decoder has
1) it can request a large enough buffer for all the frames and send them
all at once. we have several decoders which do this.
2) it can only decode part of the packet and send one frame of output.
then the remaining packet data is fed in the next call.
3) it can copy the whole packet locally and decode only one frame at a
time per decode() call (e.g. used for speex where frame boundaries are
not byte-aligned)
4) it can decode the whole packet into a private buffer and only output
one frame at a time to the user. this shouldn't really be necessary, but
it still works.
I guess this can be summed up as, if avcodec_decode_audio4() returned a
frame at all, the buffer is immediately released. Allowing a decoder to
get_buffer() in one decode() call and return the frame in a later call
might be useful and should not cause problems, so I think we should
include this possibility.
--
Måns Rullgård
***@mansr.com
Uoti Urpala
2011-11-13 22:13:06 UTC
Permalink
Post by Måns Rullgård
I guess this can be summed up as, if avcodec_decode_audio4() returned a
frame at all, the buffer is immediately released. Allowing a decoder to
get_buffer() in one decode() call and return the frame in a later call
might be useful and should not cause problems, so I think we should
include this possibility.
It would cause problems. For video one frame is a natural unit. For
audio the corresponding unit is one sample for each channel (though
since sample rates are much higher for audio individual processing of
each unit is not practical). One codec output packet is not a natural
unit for any further audio processing. The normal way to use decoded
audio packets is to join the samples from adjacent packets in a single
buffer (and possibly cut them again into units sized appropriately for
further processing). Thus the natural location to write output is the
current write position in a larger buffer. Giving a codec the freedom to
allocate output space in random order or to keep a portion of the buffer
locked even after the decode() call returns would make things a lot more
complex and clumsy. If this is actually gives a significant efficiency
benefit with some future audio codec a new API extension can be added
for it; until then code should not need to deal with such complexities.
Måns Rullgård
2011-11-13 22:47:58 UTC
Permalink
Post by Uoti Urpala
Post by Måns Rullgård
I guess this can be summed up as, if avcodec_decode_audio4() returned a
frame at all, the buffer is immediately released. Allowing a decoder to
get_buffer() in one decode() call and return the frame in a later call
might be useful and should not cause problems, so I think we should
include this possibility.
It would cause problems. For video one frame is a natural unit. For
audio the corresponding unit is one sample for each channel (though
since sample rates are much higher for audio individual processing of
each unit is not practical). One codec output packet is not a natural
unit for any further audio processing. The normal way to use decoded
audio packets is to join the samples from adjacent packets in a single
buffer (and possibly cut them again into units sized appropriately for
further processing). Thus the natural location to write output is the
current write position in a larger buffer. Giving a codec the freedom to
allocate output space in random order or to keep a portion of the buffer
locked even after the decode() call returns would make things a lot more
complex and clumsy. If this is actually gives a significant efficiency
benefit with some future audio codec a new API extension can be added
for it; until then code should not need to deal with such complexities.
You deliberately misconstrue what I say, as always.

For the benefit of others who might otherwise be confused by your
ramblings, I did not at all suggest holding onto a buffer after it has
been returned. What I did suggest is to allow one decode() call to
request a buffer which is then filled over some number of following
calls. The proposed API already has a provision for not returning any
decoded data from a decode() call, so nothing really changes. I am
merely saying we should make it explicit that get_buffer() may be called
from a decode() invocation prior to the one actually completing the
decode. This would be useful, for instance, with a hypothetical codec
storing left and right channels in separate packets.
--
Måns Rullgård
***@mansr.com
Uoti Urpala
2011-11-14 00:17:14 UTC
Permalink
Post by Måns Rullgård
You deliberately misconstrue what I say, as always.
You take things personally as usual, and your claims are factually
inaccurate again.
Post by Måns Rullgård
For the benefit of others who might otherwise be confused by your
ramblings, I did not at all suggest holding onto a buffer after it has
been returned. What I did suggest is to allow one decode() call to
request a buffer which is then filled over some number of following
calls. The proposed API already has a provision for not returning any
decoded data from a decode() call, so nothing really changes. I am
If you limit the buffer-locking behavior to "reserve output space, read
multiple input packets, return output" then that doesn't change much in
the case where the caller can immediately provide the needed extra
packets. However, it does make things more complicated if the caller
can't treat the steps until getting the output as one atomic decode
operation due to not having all the required input packets immediately
available. Then the locked state is exposed, and the caller can't for
example freely take a portion of the decoded data from the beginning of
its buffer and memmove() the rest.
Måns Rullgård
2011-11-14 00:51:44 UTC
Permalink
Post by Uoti Urpala
Post by Måns Rullgård
For the benefit of others who might otherwise be confused by your
ramblings, I did not at all suggest holding onto a buffer after it has
been returned. What I did suggest is to allow one decode() call to
request a buffer which is then filled over some number of following
calls. The proposed API already has a provision for not returning any
decoded data from a decode() call, so nothing really changes. I am
If you limit the buffer-locking behavior to "reserve output space, read
multiple input packets, return output" then that doesn't change much in
the case where the caller can immediately provide the needed extra
packets. However, it does make things more complicated if the caller
can't treat the steps until getting the output as one atomic decode
operation due to not having all the required input packets immediately
available. Then the locked state is exposed, and the caller can't for
example freely take a portion of the decoded data from the beginning of
its buffer and memmove() the rest.
The part of my message you so conveniently snipped gave an example of a
situation where the full buffer would be required immediately, yet none
of it would be usable until after several decode() calls. Besides, what
locks are you talking about?

If your main interest is "winning" arguments, I suggest sticking to your
own mailing lists, where I'm sure you'll meet very little opposition.
--
Måns Rullgård
***@mansr.com
Uoti Urpala
2011-11-14 02:08:41 UTC
Permalink
Post by Måns Rullgård
Post by Uoti Urpala
If you limit the buffer-locking behavior to "reserve output space, read
multiple input packets, return output" then that doesn't change much in
the case where the caller can immediately provide the needed extra
packets. However, it does make things more complicated if the caller
can't treat the steps until getting the output as one atomic decode
operation due to not having all the required input packets immediately
available. Then the locked state is exposed, and the caller can't for
example freely take a portion of the decoded data from the beginning of
its buffer and memmove() the rest.
The part of my message you so conveniently snipped gave an example of a
situation where the full buffer would be required immediately, yet none
of it would be usable until after several decode() calls.
You first claimed that this behavior would make no difference for the
use of the API. That claim is false as described above. You then gave an
example where the behavior could be useful involving a "hypothetical
codec". It's possible to construct such hypothetical cases, but it's
also easy to construct cases which would require more general buffer
allocation/freeing behavior, including allocating multiple buffers in
advance and freeing them long after the audio has been returned. Thus
the ability to construct such a hypothetical example is not real
evidence of the API requirements being beneficial. Your claims about the
costs of such requirements were wrong, and claims about benefits
hypothetical and unsubstantiated.
Post by Måns Rullgård
Besides, what
locks are you talking about?
The output memory being locked "for future libavcodec output", as
opposed to each decode call writing samples (possibly zero) to the
specified memory location and the caller being free to shuffle buffers
between any two calls.
Luca Barbato
2011-11-14 02:01:17 UTC
Permalink
Post by Uoti Urpala
Post by Måns Rullgård
You deliberately misconstrue what I say, as always.
You take things personally as usual, and your claims are factually
inaccurate again.
Sometimes your technical concerns gets taken as personal since your way
to deliver information tend to be dry and some words might taken as harsh.
Post by Uoti Urpala
If you limit the buffer-locking behavior to "reserve output space, read
multiple input packets, return output" then that doesn't change much in
the case where the caller can immediately provide the needed extra
packets. However, it does make things more complicated if the caller
can't treat the steps until getting the output as one atomic decode
operation due to not having all the required input packets immediately
available. Then the locked state is exposed, and the caller can't for
example freely take a portion of the decoded data from the beginning of
its buffer and memmove() the rest.
I'm a bit lost here, could you try to provide some examples? I'd like to
have this new api as nice to use as possible, so we need as much input
as possible and you have some good direct experience with mplayer2 in
this regard.

lu
Ronald S. Bultje
2011-11-14 01:11:56 UTC
Permalink
Hi,
Post by Måns Rullgård
Post by Uoti Urpala
Post by Måns Rullgård
I guess this can be summed up as, if avcodec_decode_audio4() returned a
frame at all, the buffer is immediately released.  Allowing a decoder to
get_buffer() in one decode() call and return the frame in a later call
might be useful and should not cause problems, so I think we should
include this possibility.
It would cause problems. For video one frame is a natural unit. For
audio the corresponding unit is one sample for each channel (though
since sample rates are much higher for audio individual processing of
each unit is not practical). One codec output packet is not a natural
unit for any further audio processing. The normal way to use decoded
audio packets is to join the samples from adjacent packets in a single
buffer (and possibly cut them again into units sized appropriately for
further processing). Thus the natural location to write output is the
current write position in a larger buffer. Giving a codec the freedom to
allocate output space in random order or to keep a portion of the buffer
locked even after the decode() call returns would make things a lot more
complex and clumsy. If this is actually gives a significant efficiency
benefit with some future audio codec a new API extension can be added
for it; until then code should not need to deal with such complexities.
You deliberately misconstrue what I say, as always.
For the benefit of others who might otherwise be confused by your
ramblings, I did not at all suggest holding onto a buffer after it has
been returned.  What I did suggest is to allow one decode() call to
request a buffer which is then filled over some number of following
calls.  The proposed API already has a provision for not returning any
decoded data from a decode() call, so nothing really changes.  I am
merely saying we should make it explicit that get_buffer() may be called
from a decode() invocation prior to the one actually completing the
decode.  This would be useful, for instance, with a hypothetical codec
storing left and right channels in separate packets.
I think this is for all practical purposes already a requirement for
multithreading anyway, so I see absolutely no problem in adding this
requirement.

Ronald
Uoti Urpala
2011-11-14 02:42:45 UTC
Permalink
Post by Ronald S. Bultje
Post by Måns Rullgård
been returned. What I did suggest is to allow one decode() call to
request a buffer which is then filled over some number of following
calls. The proposed API already has a provision for not returning any
I think this is for all practical purposes already a requirement for
multithreading anyway, so I see absolutely no problem in adding this
requirement.
I don't think that matches multithreading requirements. I assume you're
thinking of some system which reads multiple packets, decodes each in a
separate thread, and then returns data from all at once (or perhaps you
misunderstood what Måns was proposing - he was specifically talking
about a case where a decode() call allocates a buffer but does not
output anything, possibly a number of subsequent calls don't return
anything either, and then everything fed in so far is returned in that
single buffer area allocated earlier).
1) You might not know the total data size after reading just the first
packet, while if you read multiple packets before deciding how big a
buffer to allocate then you might just as well return data in the same
call too (since you can now have enough packets to do the actual
decoding immediately).
2) The main reason I could see for allocating output storage in advance
of writing full output (read/allocate/write/read/write/output vs
read/read/allocate/write/write/output) is that you would not need to
keep all input packets buffered in memory simultaneously (you'd decode
each, then that could be freed before you move to the next part). But if
the packets are being decoded simultaneously in then you need to have
them all in memory anyway.
3) Requiring one big buffer for multithreading output would be
inconsistent with the assumptions Justin Ruggles had, where alignment
requirements could mean that decoding multiple consecutive packets to a
single output buffer might require an extra memcpy() from aligned
decoding to unaligned output.

I think it's not obvious what kind of API threaded audio decoding should
have in general. Also, is it really needed (for most decoding
applications I'd say no, but probably there are some uses when you'd
want more speed)? If it's really seriously supported, should there be
better coordination between the thread pools for video and audio
decoding?

Justin Ruggles
2011-11-12 22:35:50 UTC
Permalink
---
avconv.c | 61 ++++++++++++++++++++++++++-----------------------------------
1 files changed, 26 insertions(+), 35 deletions(-)

diff --git a/avconv.c b/avconv.c
index eb7df9a..9e734f8 100644
--- a/avconv.c
+++ b/avconv.c
@@ -137,8 +137,6 @@ static uint8_t *audio_buf;
static uint8_t *audio_out;
static unsigned int allocated_audio_out_size, allocated_audio_buf_size;

-static void *samples;
-
#define DEFAULT_PASS_LOGFILENAME_PREFIX "av2pass"

typedef struct InputStream {
@@ -539,7 +537,6 @@ void exit_program(int ret)
av_free(audio_buf);
av_free(audio_out);
allocated_audio_buf_size= allocated_audio_out_size= 0;
- av_free(samples);

#if CONFIG_AVFILTER
avfilter_uninit();
@@ -730,11 +727,10 @@ static void write_frame(AVFormatContext *s, AVPacket *pkt, AVCodecContext *avctx
static void do_audio_out(AVFormatContext *s,
OutputStream *ost,
InputStream *ist,
- unsigned char *buf, int size)
+ AVFrame *decoded_frame)
{
uint8_t *buftmp;
int64_t audio_out_size, audio_buf_size;
- int64_t allocated_for_size= size;

int size_out, frame_bytes, ret, resample_changed;
AVCodecContext *enc= ost->st->codec;
@@ -742,6 +738,9 @@ static void do_audio_out(AVFormatContext *s,
int osize = av_get_bytes_per_sample(enc->sample_fmt);
int isize = av_get_bytes_per_sample(dec->sample_fmt);
const int coded_bps = av_get_bits_per_sample(enc->codec->id);
+ uint8_t *buf = decoded_frame->audio_data[0];
+ int size = decoded_frame->nb_samples * dec->channels * isize;
+ int64_t allocated_for_size = size;

need_realloc:
audio_buf_size= (allocated_for_size + isize*dec->channels - 1) / (isize*dec->channels);
@@ -1536,7 +1535,6 @@ static int output_packet(InputStream *ist, int ist_index,
int ret = 0, i;
int got_output;
void *buffer_to_free = NULL;
- static unsigned int samples_size= 0;
AVSubtitle subtitle, *subtitle_to_free;
int64_t pkt_pts = AV_NOPTS_VALUE;
#if CONFIG_AVFILTER
@@ -1567,8 +1565,8 @@ static int output_packet(InputStream *ist, int ist_index,

//while we have more to decode or while the decoder did output something on EOF
while (avpkt.size > 0 || (!pkt && got_output)) {
- uint8_t *data_buf, *decoded_data_buf;
- int data_size, decoded_data_size;
+ uint8_t *data_buf;
+ int data_size;
AVFrame *decoded_frame, *filtered_frame;
handle_eof:
ist->pts= ist->next_pts;
@@ -1580,38 +1578,29 @@ static int output_packet(InputStream *ist, int ist_index,

/* decode the packet if needed */
decoded_frame = filtered_frame = NULL;
- decoded_data_buf = NULL; /* fail safe */
- decoded_data_size= 0;
data_buf = avpkt.data;
data_size = avpkt.size;
subtitle_to_free = NULL;
if (ist->decoding_needed) {
switch(ist->st->codec->codec_type) {
case AVMEDIA_TYPE_AUDIO:{
- if(pkt && samples_size < FFMAX(pkt->size * bps, AVCODEC_MAX_AUDIO_FRAME_SIZE)) {
- samples_size = FFMAX(pkt->size * bps, AVCODEC_MAX_AUDIO_FRAME_SIZE);
- av_free(samples);
- samples= av_malloc(samples_size);
- }
- decoded_data_size= samples_size;
+ if (!(decoded_frame = avcodec_alloc_frame()))
+ return AVERROR(ENOMEM);
/* XXX: could avoid copy if PCM 16 bits with same
endianness as CPU */
- ret = avcodec_decode_audio3(ist->st->codec, samples, &decoded_data_size,
- &avpkt);
+ ret = avcodec_decode_audio4(ist->st->codec, decoded_frame,
+ &got_output, &avpkt);
if (ret < 0)
- return ret;
+ goto fail;
avpkt.data += ret;
avpkt.size -= ret;
data_size = ret;
- got_output = decoded_data_size > 0;
- /* Some bug in mpeg audio decoder gives */
- /* decoded_data_size < 0, it seems they are overflows */
if (!got_output) {
/* no audio frame */
+ av_freep(&decoded_frame);
continue;
}
- decoded_data_buf = (uint8_t *)samples;
- ist->next_pts += ((int64_t)AV_TIME_BASE/bps * decoded_data_size) /
+ ist->next_pts += ((int64_t)AV_TIME_BASE/bps * decoded_frame->linesize[0]) /
(ist->st->codec->sample_rate * ist->st->codec->channels);
break;}
case AVMEDIA_TYPE_VIDEO:
@@ -1678,11 +1667,13 @@ static int output_packet(InputStream *ist, int ist_index,
// preprocess audio (volume)
if (ist->st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
if (audio_volume != 256) {
+ int nb_samples = decoded_frame->linesize[0] /
+ av_get_bytes_per_sample(ist->st->codec->sample_fmt);
switch (ist->st->codec->sample_fmt) {
case AV_SAMPLE_FMT_U8:
{
- uint8_t *volp = samples;
- for (i = 0; i < (decoded_data_size / sizeof(*volp)); i++) {
+ uint8_t *volp = (uint8_t *)decoded_frame->data[0];
+ for (i = 0; i < nb_samples; i++) {
int v = (((*volp - 128) * audio_volume + 128) >> 8) + 128;
*volp++ = av_clip_uint8(v);
}
@@ -1690,8 +1681,8 @@ static int output_packet(InputStream *ist, int ist_index,
}
case AV_SAMPLE_FMT_S16:
{
- int16_t *volp = samples;
- for (i = 0; i < (decoded_data_size / sizeof(*volp)); i++) {
+ int16_t *volp = (int16_t *)decoded_frame->data[0];
+ for (i = 0; i < nb_samples; i++) {
int v = ((*volp) * audio_volume + 128) >> 8;
*volp++ = av_clip_int16(v);
}
@@ -1699,8 +1690,8 @@ static int output_packet(InputStream *ist, int ist_index,
}
case AV_SAMPLE_FMT_S32:
{
- int32_t *volp = samples;
- for (i = 0; i < (decoded_data_size / sizeof(*volp)); i++) {
+ int32_t *volp = (int32_t *)decoded_frame->data[0];
+ for (i = 0; i < nb_samples; i++) {
int64_t v = (((int64_t)*volp * audio_volume + 128) >> 8);
*volp++ = av_clipl_int32(v);
}
@@ -1708,18 +1699,18 @@ static int output_packet(InputStream *ist, int ist_index,
}
case AV_SAMPLE_FMT_FLT:
{
- float *volp = samples;
+ float *volp = (float *)decoded_frame->data[0];
float scale = audio_volume / 256.f;
- for (i = 0; i < (decoded_data_size / sizeof(*volp)); i++) {
+ for (i = 0; i < nb_samples; i++) {
*volp++ *= scale;
}
break;
}
case AV_SAMPLE_FMT_DBL:
{
- double *volp = samples;
+ double *volp = (double *)decoded_frame->data[0];
double scale = audio_volume / 256.;
- for (i = 0; i < (decoded_data_size / sizeof(*volp)); i++) {
+ for (i = 0; i < nb_samples; i++) {
*volp++ *= scale;
}
break;
@@ -1794,7 +1785,7 @@ static int output_packet(InputStream *ist, int ist_index,
av_assert0(ist->decoding_needed);
switch(ost->st->codec->codec_type) {
case AVMEDIA_TYPE_AUDIO:
- do_audio_out(os, ost, ist, decoded_data_buf, decoded_data_size);
+ do_audio_out(os, ost, ist, decoded_frame);
break;
case AVMEDIA_TYPE_VIDEO:
#if CONFIG_AVFILTER
--
1.7.1
Justin Ruggles
2011-11-12 22:35:51 UTC
Permalink
---
libavformat/utils.c | 29 +++++++++++++++--------------
1 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/libavformat/utils.c b/libavformat/utils.c
index c2beeae..51d8805 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -2124,10 +2124,10 @@ static int has_decode_delay_been_guessed(AVStream *st)

static int try_decode_frame(AVStream *st, AVPacket *avpkt, AVDictionary **options)
{
- int16_t *samples;
AVCodec *codec;
int got_picture, data_size, ret=0;
AVFrame picture;
+ AVPacket pkt = *avpkt;

if(!st->codec->codec){
codec = avcodec_find_decoder(st->codec->codec_id);
@@ -2138,28 +2138,29 @@ static int try_decode_frame(AVStream *st, AVPacket *avpkt, AVDictionary **option
return ret;
}

- if(!has_codec_parameters(st->codec) || !has_decode_delay_been_guessed(st) ||
- (!st->codec_info_nb_frames && st->codec->codec->capabilities & CODEC_CAP_CHANNEL_CONF)) {
+ while (pkt.size > 0 && ret >= 0 &&
+ (!has_codec_parameters(st->codec) ||
+ !has_decode_delay_been_guessed(st) ||
+ (!st->codec_info_nb_frames && st->codec->codec->capabilities & CODEC_CAP_CHANNEL_CONF))) {
+ got_picture = 0;
+ avcodec_get_frame_defaults(&picture);
switch(st->codec->codec_type) {
case AVMEDIA_TYPE_VIDEO:
- avcodec_get_frame_defaults(&picture);
ret = avcodec_decode_video2(st->codec, &picture,
- &got_picture, avpkt);
- if (got_picture)
- st->info->nb_decoded_frames++;
+ &got_picture, &pkt);
break;
case AVMEDIA_TYPE_AUDIO:
- data_size = FFMAX(avpkt->size, AVCODEC_MAX_AUDIO_FRAME_SIZE);
- samples = av_malloc(data_size);
- if (!samples)
- goto fail;
- ret = avcodec_decode_audio3(st->codec, samples,
- &data_size, avpkt);
- av_free(samples);
+ ret = avcodec_decode_audio4(st->codec, &picture, &got_picture, &pkt);
break;
default:
break;
}
+ if (ret >= 0) {
+ if (got_picture)
+ st->info->nb_decoded_frames++;
+ pkt.data += ret;
+ pkt.size -= ret;
+ }
}
fail:
return ret;
--
1.7.1
Justin Ruggles
2011-11-12 22:35:52 UTC
Permalink
---
avplay.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/avplay.c b/avplay.c
index 01c0f11..b5e54fe 100644
--- a/avplay.c
+++ b/avplay.c
@@ -157,6 +157,7 @@ typedef struct VideoState {
compensation */
DECLARE_ALIGNED(16,uint8_t,audio_buf1)[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];
DECLARE_ALIGNED(16,uint8_t,audio_buf2)[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];
+ uint8_t silence_buf[SDL_AUDIO_BUFFER_SIZE];
uint8_t *audio_buf;
unsigned int audio_buf_size; /* in bytes */
int audio_buf_index; /* in bytes */
@@ -2129,9 +2130,8 @@ static void sdl_audio_callback(void *opaque, Uint8 *stream, int len)
audio_size = audio_decode_frame(is, &pts);
if (audio_size < 0) {
/* if error, just output silence */
- is->audio_buf = is->audio_buf1;
- is->audio_buf_size = 1024;
- memset(is->audio_buf, 0, is->audio_buf_size);
+ is->audio_buf = is->silence_buf;
+ is->audio_buf_size = SDL_AUDIO_BUFFER_SIZE;
} else {
if (is->show_audio)
update_sample_display(is, (int16_t *)is->audio_buf, audio_size);
--
1.7.1
Justin Ruggles
2011-11-12 22:35:53 UTC
Permalink
---
avplay.c | 41 +++++++++++++++++++++++++----------------
1 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/avplay.c b/avplay.c
index b5e54fe..a33b183 100644
--- a/avplay.c
+++ b/avplay.c
@@ -153,12 +153,9 @@ typedef struct VideoState {
AVStream *audio_st;
PacketQueue audioq;
int audio_hw_buf_size;
- /* samples output by the codec. we reserve more space for avsync
- compensation */
- DECLARE_ALIGNED(16,uint8_t,audio_buf1)[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];
- DECLARE_ALIGNED(16,uint8_t,audio_buf2)[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];
uint8_t silence_buf[SDL_AUDIO_BUFFER_SIZE];
uint8_t *audio_buf;
+ uint8_t *audio_buf1;
unsigned int audio_buf_size; /* in bytes */
int audio_buf_index; /* in bytes */
AVPacket audio_pkt_temp;
@@ -2010,21 +2007,21 @@ static int audio_decode_frame(VideoState *is, double *pts_ptr)
AVPacket *pkt_temp = &is->audio_pkt_temp;
AVPacket *pkt = &is->audio_pkt;
AVCodecContext *dec= is->audio_st->codec;
- int n, len1, data_size;
+ int n, len1, data_size, got_frame;
double pts;
int new_packet = 0;
int flush_complete = 0;
+ AVFrame *decoded_frame = NULL;

for(;;) {
/* NOTE: the audio packet can contain several frames */
while (pkt_temp->size > 0 || (!pkt_temp->data && new_packet)) {
+ if (!(decoded_frame = avcodec_alloc_frame()))
+ return AVERROR(ENOMEM);
if (flush_complete)
break;
new_packet = 0;
- data_size = sizeof(is->audio_buf1);
- len1 = avcodec_decode_audio3(dec,
- (int16_t *)is->audio_buf1, &data_size,
- pkt_temp);
+ len1 = avcodec_decode_audio4(dec, decoded_frame, &got_frame, pkt_temp);
if (len1 < 0) {
/* if error, we skip the frame */
pkt_temp->size = 0;
@@ -2034,12 +2031,15 @@ static int audio_decode_frame(VideoState *is, double *pts_ptr)
pkt_temp->data += len1;
pkt_temp->size -= len1;

- if (data_size <= 0) {
+ if (!got_frame) {
/* stop sending empty packets if the decoder is finished */
if (!pkt_temp->data && dec->codec->capabilities & CODEC_CAP_DELAY)
flush_complete = 1;
+ av_freep(&decoded_frame);
continue;
}
+ data_size = decoded_frame->nb_samples * dec->channels *
+ av_get_bytes_per_sample(dec->sample_fmt);

if (dec->sample_fmt != is->audio_src_fmt) {
if (is->reformat_ctx)
@@ -2056,21 +2056,27 @@ static int audio_decode_frame(VideoState *is, double *pts_ptr)
}

if (is->reformat_ctx) {
- const void *ibuf[6]= {is->audio_buf1};
- void *obuf[6]= {is->audio_buf2};
+ const void *ibuf[6]= {decoded_frame->audio_data[0]};
+ void *obuf[6];
int istride[6]= {av_get_bytes_per_sample(dec->sample_fmt)};
int ostride[6]= {2};
int len= data_size/istride[0];
+ obuf[0] = av_realloc(is->audio_buf1, FFALIGN(len * ostride[0], 32));
+ if (!obuf[0]) {
+ av_freep(&decoded_frame);
+ return AVERROR(ENOMEM);
+ }
+ is->audio_buf1 = obuf[0];
if (av_audio_convert(is->reformat_ctx, obuf, ostride, ibuf, istride, len)<0) {
printf("av_audio_convert() failed\n");
break;
}
- is->audio_buf= is->audio_buf2;
+ is->audio_buf = is->audio_buf1;
/* FIXME: existing code assume that data_size equals framesize*channels*2
remove this legacy cruft */
data_size= len*2;
}else{
- is->audio_buf= is->audio_buf1;
+ is->audio_buf = decoded_frame->audio_data[0];
}

/* if no pts, then compute it */
@@ -2088,8 +2094,10 @@ static int audio_decode_frame(VideoState *is, double *pts_ptr)
last_clock = is->audio_clock;
}
#endif
+ av_freep(&decoded_frame);
return data_size;
}
+ av_freep(&decoded_frame);

/* free the current packet */
if (pkt->data)
@@ -2106,8 +2114,7 @@ static int audio_decode_frame(VideoState *is, double *pts_ptr)
if (pkt->data == flush_pkt.data)
avcodec_flush_buffers(dec);

- pkt_temp->data = pkt->data;
- pkt_temp->size = pkt->size;
+ *pkt_temp = *pkt;

/* if update the audio clock with the pts */
if (pkt->pts != AV_NOPTS_VALUE) {
@@ -2275,6 +2282,8 @@ static void stream_component_close(VideoState *is, int stream_index)
if (is->reformat_ctx)
av_audio_convert_free(is->reformat_ctx);
is->reformat_ctx = NULL;
+ av_freep(&is->audio_buf1);
+ is->audio_buf = NULL;

if (is->rdft) {
av_rdft_end(is->rdft);
--
1.7.1
Justin Ruggles
2011-11-12 22:35:54 UTC
Permalink
---
libavcodec/api-example.c | 18 +++++++++---------
1 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/libavcodec/api-example.c b/libavcodec/api-example.c
index ec71b0d..c0e2614 100644
--- a/libavcodec/api-example.c
+++ b/libavcodec/api-example.c
@@ -118,9 +118,8 @@ static void audio_decode_example(const char *outfilename, const char *filename)
{
AVCodec *codec;
AVCodecContext *c= NULL;
- int out_size, len;
+ int len;
FILE *f, *outfile;
- uint8_t *outbuf;
uint8_t inbuf[AUDIO_INBUF_SIZE + FF_INPUT_BUFFER_PADDING_SIZE];
AVPacket avpkt;

@@ -143,8 +142,6 @@ static void audio_decode_example(const char *outfilename, const char *filename)
exit(1);
}

- outbuf = malloc(AVCODEC_MAX_AUDIO_FRAME_SIZE);
-
f = fopen(filename, "rb");
if (!f) {
fprintf(stderr, "could not open %s\n", filename);
@@ -161,15 +158,19 @@ static void audio_decode_example(const char *outfilename, const char *filename)
avpkt.size = fread(inbuf, 1, AUDIO_INBUF_SIZE, f);

while (avpkt.size > 0) {
- out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
- len = avcodec_decode_audio3(c, (short *)outbuf, &out_size, &avpkt);
+ AVFrame decoded_frame;
+ int got_frame = 0;
+
+ len = avcodec_decode_audio4(c, &decoded_frame, &got_frame, &avpkt);
if (len < 0) {
fprintf(stderr, "Error while decoding\n");
exit(1);
}
- if (out_size > 0) {
+ if (got_frame) {
/* if a frame has been decoded, output it */
- fwrite(outbuf, 1, out_size, outfile);
+ int data_size = decoded_frame.nb_samples * c->channels *
+ av_get_bytes_per_sample(c->sample_fmt);
+ fwrite(decoded_frame.audio_data[0], 1, data_size, outfile);
}
avpkt.size -= len;
avpkt.data += len;
@@ -189,7 +190,6 @@ static void audio_decode_example(const char *outfilename, const char *filename)

fclose(outfile);
fclose(f);
- free(outbuf);

avcodec_close(c);
av_free(c);
--
1.7.1
Justin Ruggles
2011-11-12 22:35:55 UTC
Permalink
---
libavcodec/pcm.c | 42 +++++++++++++++++++++++++-----------------
1 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c
index c9eb543..0297f11 100644
--- a/libavcodec/pcm.c
+++ b/libavcodec/pcm.c
@@ -192,6 +192,7 @@ static int pcm_encode_frame(AVCodecContext *avctx,
}

typedef struct PCMDecode {
+ AVFrame frame;
short table[256];
} PCMDecode;

@@ -223,6 +224,9 @@ static av_cold int pcm_decode_init(AVCodecContext * avctx)
if (avctx->sample_fmt == AV_SAMPLE_FMT_S32)
avctx->bits_per_raw_sample = av_get_bits_per_sample(avctx->codec->id);

+ avcodec_get_frame_defaults(&s->frame);
+ avctx->coded_frame = &s->frame;
+
return 0;
}

@@ -243,22 +247,20 @@ static av_cold int pcm_decode_init(AVCodecContext * avctx)
dst += size / 8; \
}

-static int pcm_decode_frame(AVCodecContext *avctx,
- void *data, int *data_size,
- AVPacket *avpkt)
+static int pcm_decode_frame(AVCodecContext *avctx, void *data,
+ int *got_frame_ptr, AVPacket *avpkt)
{
const uint8_t *src = avpkt->data;
int buf_size = avpkt->size;
PCMDecode *s = avctx->priv_data;
- int sample_size, c, n, out_size;
+ int sample_size, c, n, out_size, ret, samples_per_block;
uint8_t *samples;
int32_t *dst_int32_t;

- samples = data;
-
sample_size = av_get_bits_per_sample(avctx->codec_id)/8;

/* av_get_bits_per_sample returns 0 for CODEC_ID_PCM_DVD */
+ samples_per_block = 1;
if (CODEC_ID_PCM_DVD == avctx->codec_id) {
if (avctx->bits_per_coded_sample != 20 &&
avctx->bits_per_coded_sample != 24) {
@@ -266,10 +268,13 @@ static int pcm_decode_frame(AVCodecContext *avctx,
return AVERROR(EINVAL);
}
/* 2 samples are interleaved per block in PCM_DVD */
+ samples_per_block = 2;
sample_size = avctx->bits_per_coded_sample * 2 / 8;
- } else if (avctx->codec_id == CODEC_ID_PCM_LXF)
+ } else if (avctx->codec_id == CODEC_ID_PCM_LXF) {
/* we process 40-bit blocks per channel for LXF */
+ samples_per_block = 2;
sample_size = 5;
+ }

if (sample_size == 0) {
av_log(avctx, AV_LOG_ERROR, "Invalid sample_size\n");
@@ -288,14 +293,13 @@ static int pcm_decode_frame(AVCodecContext *avctx,

n = buf_size/sample_size;

- out_size = n * av_get_bytes_per_sample(avctx->sample_fmt);
- if (avctx->codec_id == CODEC_ID_PCM_DVD ||
- avctx->codec_id == CODEC_ID_PCM_LXF)
- out_size *= 2;
- if (*data_size < out_size) {
- av_log(avctx, AV_LOG_ERROR, "output buffer too small\n");
- return AVERROR(EINVAL);
+ /* get output buffer */
+ s->frame.nb_samples = n * samples_per_block / avctx->channels;
+ if ((ret = avctx->get_buffer(avctx, &s->frame)) < 0) {
+ av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+ return ret;
}
+ samples = (short *)s->frame.audio_data[0];

switch(avctx->codec->id) {
case CODEC_ID_PCM_U32LE:
@@ -402,7 +406,7 @@ static int pcm_decode_frame(AVCodecContext *avctx,
case CODEC_ID_PCM_DVD:
{
const uint8_t *src8;
- dst_int32_t = data;
+ dst_int32_t = (int32_t *)s->frame.audio_data[0];
n /= avctx->channels;
switch (avctx->bits_per_coded_sample) {
case 20:
@@ -435,7 +439,7 @@ static int pcm_decode_frame(AVCodecContext *avctx,
{
int i;
const uint8_t *src8;
- dst_int32_t = data;
+ dst_int32_t = (int32_t *)s->frame.audio_data[0];
n /= avctx->channels;
//unpack and de-planerize
for (i = 0; i < n; i++) {
@@ -457,7 +461,10 @@ static int pcm_decode_frame(AVCodecContext *avctx,
default:
return -1;
}
- *data_size = out_size;
+
+ *got_frame_ptr = 1;
+ *(AVFrame *)data = s->frame;
+
return buf_size;
}

@@ -486,6 +493,7 @@ AVCodec ff_ ## name_ ## _decoder = { \
.priv_data_size = sizeof(PCMDecode), \
.init = pcm_decode_init, \
.decode = pcm_decode_frame, \
+ .capabilities = CODEC_CAP_DR1, \
.sample_fmts = (const enum AVSampleFormat[]){sample_fmt_,AV_SAMPLE_FMT_NONE}, \
.long_name = NULL_IF_CONFIG_SMALL(long_name_), \
}
--
1.7.1
Continue reading on narkive:
Loading...