Discussion:
[PATCH 1/2] libavcodec: Add more AAC profiles
(too old to reply)
Martin Storsjö
2012-07-11 12:38:53 UTC
Permalink
TODO: Are the mpeg2 ones official or specific to the fraunhofer
encoder?
---
libavcodec/avcodec.h | 4 ++++
libavcodec/options_table.h | 4 ++++
2 files changed, 8 insertions(+)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index a1103e9..0c962cc 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2714,6 +2714,10 @@ typedef struct AVCodecContext {
#define FF_PROFILE_AAC_LOW 1
#define FF_PROFILE_AAC_SSR 2
#define FF_PROFILE_AAC_LTP 3
+#define FF_PROFILE_AAC_HE 4
+#define FF_PROFILE_AAC_HE_V2 28
+#define FF_PROFILE_AAC_LD 22
+#define FF_PROFILE_AAC_ELD 38

#define FF_PROFILE_DTS 20
#define FF_PROFILE_DTS_ES 30
diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index 7f5b643..4f903cc 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -316,6 +316,10 @@ static const AVOption options[]={
{"aac_low", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_LOW }, INT_MIN, INT_MAX, A|E, "profile"},
{"aac_ssr", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_SSR }, INT_MIN, INT_MAX, A|E, "profile"},
{"aac_ltp", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_LTP }, INT_MIN, INT_MAX, A|E, "profile"},
+{"aac_he", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_HE }, INT_MIN, INT_MAX, A|E, "profile"},
+{"aac_he_v2", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_HE_V2 }, INT_MIN, INT_MAX, A|E, "profile"},
+{"aac_ld", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_LD }, INT_MIN, INT_MAX, A|E, "profile"},
+{"aac_eld", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_ELD }, INT_MIN, INT_MAX, A|E, "profile"},
{"dts", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_DTS }, INT_MIN, INT_MAX, A|E, "profile"},
{"dts_es", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_DTS_ES }, INT_MIN, INT_MAX, A|E, "profile"},
{"dts_96_24", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_DTS_96_24 }, INT_MIN, INT_MAX, A|E, "profile"},
--
1.7.9.4
Martin Storsjö
2012-07-11 12:38:54 UTC
Permalink
For enabling VBR, the general consensus seems to be to use the
qscale flag. There doesn't seem to be any consistent way to
indicate the actual desired quality though. Both libfaac and
libmp3lame calculate avctx->global_quality / FF_QP2LAMBDA and set
that as the libraries' VBR quality parameters, with wildly different
results. On libmp3lame, the VBR quality parameter is between 0 (best)
and 10 (worst), while the scale goes in the opposite direction for
libfaac, where higher quality values gives you better quality.

Therefore, for now, I just pass the actual value of
avctx->global_quality through. You can set it to values between 1
and 5:
1 - about 32 kbps/channel
2 - about 40 kbps/channel
3 - about 48-56 kbps/channel
4 - about 64 kbps/channel
5 - about 80-96 kbps/channel
---
Changelog | 1 +
configure | 5 +
doc/general.texi | 11 +-
libavcodec/Makefile | 1 +
libavcodec/allcodecs.c | 1 +
libavcodec/libfdk-aacenc.c | 313 ++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 331 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/libfdk-aacenc.c

diff --git a/Changelog b/Changelog
index 2fb5e3d..c56740c 100644
--- a/Changelog
+++ b/Changelog
@@ -33,6 +33,7 @@ version <next>:
- Microsoft ATC Screen decoder
- RTSP listen mode
- TechSmith Screen Codec 2 decoder
+- AAC encoding via libfdk-aac


version 0.8:
diff --git a/configure b/configure
index 2888c72..282974c 100755
--- a/configure
+++ b/configure
@@ -170,6 +170,7 @@ External library support:
--enable-libdc1394 enable IIDC-1394 grabbing using libdc1394
and libraw1394 [no]
--enable-libfaac enable FAAC support via libfaac [no]
+ --enable-libfdk-aac enable AAC support via libfdk-aac [no]
--enable-libfreetype enable libfreetype [no]
--enable-libgsm enable GSM support via libgsm [no]
--enable-libilbc enable iLBC de/encoding via libilbc [no]
@@ -943,6 +944,7 @@ CONFIG_LIST="
libcdio
libdc1394
libfaac
+ libfdk_aac
libfreetype
libgsm
libilbc
@@ -1447,6 +1449,7 @@ h264_parser_select="golomb h264dsp h264pred"

# external libraries
libfaac_encoder_deps="libfaac"
+libfdk_aac_encoder_deps="libfdk_aac"
libgsm_decoder_deps="libgsm"
libgsm_encoder_deps="libgsm"
libgsm_ms_decoder_deps="libgsm"
@@ -2966,6 +2969,7 @@ enabled avisynth && require2 vfw32 "windows.h vfw.h" AVIFileInit -lavifil32
enabled frei0r && { check_header frei0r.h || die "ERROR: frei0r.h header not found"; }
enabled gnutls && require_pkg_config gnutls gnutls/gnutls.h gnutls_global_init
enabled libfaac && require2 libfaac "stdint.h faac.h" faacEncGetVersion -lfaac
+enabled libfdk_aac && require libfdk_aac fdk-aac/aacenc_lib.h aacEncOpen -lfdk-aac
enabled libfreetype && require_pkg_config freetype2 "ft2build.h freetype/freetype.h" FT_Init_FreeType
enabled libgsm && require libgsm gsm/gsm.h gsm_create -lgsm
enabled libilbc && require libilbc ilbc.h WebRtcIlbcfix_InitDecode -lilbc
@@ -3257,6 +3261,7 @@ echo "gnutls enabled ${gnutls-no}"
echo "libcdio support ${libcdio-no}"
echo "libdc1394 support ${libdc1394-no}"
echo "libfaac enabled ${libfaac-no}"
+echo "libfdk-aac enabled ${libfdk_aac-no}"
echo "libgsm enabled ${libgsm-no}"
echo "libilbc enabled ${libilbc-no}"
echo "libmp3lame enabled ${libmp3lame-no}"
diff --git a/doc/general.texi b/doc/general.texi
index 7e9cfaf..c9af510 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -18,7 +18,8 @@ explicitly requested by passing the appropriate flags to

@section OpenCORE and VisualOn libraries

-Spun off Google Android sources, OpenCore and VisualOn libraries provide
+Spun off Google Android sources, OpenCore, VisualOn and Fraunhofer
+libraries provide
encoders for a number of audio codecs.

@float NOTE
@@ -55,6 +56,14 @@ Go to @url{http://sourceforge.net/projects/opencore-amr/} and follow the
instructions for installing the library.
Then pass @code{--enable-libvo-amrwbenc} to configure to enable it.

+@subsection Fraunhofer AAC library
+
+Libav can make use of the Fraunhofer AAC library for AAC encoding.
+
+Go to @url{http://sourceforge.net/projects/opencore-amr/} and follow the
+instructions for installing the library.
+Then pass @code{--enable-libfdk-aac} to configure to enable it.
+
@section LAME

Libav can make use of the LAME library for MP3 encoding.
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index ac97d34..8d38ca2 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -595,6 +595,7 @@ OBJS-$(CONFIG_WTV_DEMUXER) += mpeg4audio.o mpegaudiodata.o

# external codec libraries
OBJS-$(CONFIG_LIBFAAC_ENCODER) += libfaac.o audio_frame_queue.o
+OBJS-$(CONFIG_LIBFDK_AAC_ENCODER) += libfdk-aacenc.o audio_frame_queue.o
OBJS-$(CONFIG_LIBGSM_DECODER) += libgsm.o
OBJS-$(CONFIG_LIBGSM_ENCODER) += libgsm.o
OBJS-$(CONFIG_LIBGSM_MS_DECODER) += libgsm.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 068f191..bd48728 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -380,6 +380,7 @@ void avcodec_register_all(void)

/* external libraries */
REGISTER_ENCODER (LIBFAAC, libfaac);
+ REGISTER_ENCODER (LIBFDK_AAC, libfdk_aac);
REGISTER_ENCDEC (LIBGSM, libgsm);
REGISTER_ENCDEC (LIBGSM_MS, libgsm_ms);
REGISTER_ENCDEC (LIBILBC, libilbc);
diff --git a/libavcodec/libfdk-aacenc.c b/libavcodec/libfdk-aacenc.c
new file mode 100644
index 0000000..eb8b0f2
--- /dev/null
+++ b/libavcodec/libfdk-aacenc.c
@@ -0,0 +1,313 @@
+/*
+ * AAC encoder wrapper
+ * Copyright (c) 2012 Martin Storsjo
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <fdk-aac/aacenc_lib.h>
+
+#include "avcodec.h"
+#include "audio_frame_queue.h"
+#include "internal.h"
+#include "libavutil/opt.h"
+
+typedef struct AACContext {
+ const AVClass *class;
+ HANDLE_AACENCODER handle;
+ int afterburner;
+ int eld_sbr;
+ int signaling;
+
+ AudioFrameQueue afq;
+} AACContext;
+
+static const AVOption aac_enc_options[] = {
+ { "afterburner", "Afterburner (improved quality)", offsetof(AACContext, afterburner), AV_OPT_TYPE_INT, { 1 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
+ { "eld_sbr", "Enable SBR for ELD (for SBR in other configurations, use the -profile parameter)", offsetof(AACContext, eld_sbr), AV_OPT_TYPE_INT, { 0 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
+ { "signaling", "SBR/PS signaling style", offsetof(AACContext, signaling), AV_OPT_TYPE_INT, { -1 }, -1, 2, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "default", "Choose signaling implicitly (explicit hierarchical by default, implicit if global header is disabled)", 0, AV_OPT_TYPE_CONST, { -1 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "implicit", "Implicit backwards compatible signaling", 0, AV_OPT_TYPE_CONST, { 0 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "explicit_sbr", "Explicit SBR, implicit PS signaling", 0, AV_OPT_TYPE_CONST, { 1 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "explicit_hierarchical", "Explicit hierarchical signaling", 0, AV_OPT_TYPE_CONST, { 2 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { NULL }
+};
+
+static const AVClass aac_enc_class = {
+ "libfdk_aac", av_default_item_name, aac_enc_options, LIBAVUTIL_VERSION_INT
+};
+
+static const char *aac_get_error(AACENC_ERROR err)
+{
+ switch (err) {
+ case AACENC_OK:
+ return "No error";
+ case AACENC_INVALID_HANDLE:
+ return "Invalid handle";
+ case AACENC_MEMORY_ERROR:
+ return "Memory allocation error";
+ case AACENC_UNSUPPORTED_PARAMETER:
+ return "Unsupported parameter";
+ case AACENC_INVALID_CONFIG:
+ return "Invalid config";
+ case AACENC_INIT_ERROR:
+ return "Initialization error";
+ case AACENC_INIT_AAC_ERROR:
+ return "AAC library initialization error";
+ case AACENC_INIT_SBR_ERROR:
+ return "SBR library initialization error";
+ case AACENC_INIT_TP_ERROR:
+ return "Transport library initialization error";
+ case AACENC_INIT_META_ERROR:
+ return "Metadata library initialization error";
+ case AACENC_ENCODE_ERROR:
+ return "Encoding error";
+ case AACENC_ENCODE_EOF:
+ return "End of file";
+ default:
+ return "Unknown error";
+ }
+}
+
+static int aac_encode_close(AVCodecContext *avctx)
+{
+ AACContext *s = avctx->priv_data;
+
+ if (s->handle)
+ aacEncClose(&s->handle);
+#if FF_API_OLD_ENCODE_AUDIO
+ av_freep(&avctx->coded_frame);
+#endif
+ av_freep(&avctx->extradata);
+ ff_af_queue_close(&s->afq);
+
+ return 0;
+}
+
+static av_cold int aac_encode_init(AVCodecContext *avctx)
+{
+ AACContext *s = avctx->priv_data;
+ int ret = AVERROR(EINVAL);
+ AACENC_InfoStruct info = { 0 };
+ CHANNEL_MODE mode;
+ AACENC_ERROR err;
+ int aot = FF_PROFILE_AAC_LOW + 1;
+
+ if ((err = aacEncOpen(&s->handle, 0, avctx->channels)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to open the encoder: %s\n", aac_get_error(err));
+ goto error;
+ }
+
+ if (avctx->profile != FF_PROFILE_UNKNOWN)
+ aot = avctx->profile + 1;
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_AOT, aot)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the AOT %d: %s\n", aot, aac_get_error(err));
+ goto error;
+ }
+
+ if (aot == FF_PROFILE_AAC_ELD + 1 && s->eld_sbr) {
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_SBR_MODE, 1)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to enable SBR for ELD: %s\n", aac_get_error(err));
+ goto error;
+ }
+ }
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_SAMPLERATE, avctx->sample_rate)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the sample rate %d: %s\n", avctx->sample_rate, aac_get_error(err));
+ goto error;
+ }
+
+ switch (avctx->channels) {
+ case 1: mode = MODE_1; break;
+ case 2: mode = MODE_2; break;
+ case 3: mode = MODE_1_2; break;
+ case 4: mode = MODE_1_2_1; break;
+ case 5: mode = MODE_1_2_2; break;
+ case 6: mode = MODE_1_2_2_1; break;
+ default:
+ av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels %d\n", avctx->channels);
+ goto error;
+ }
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_CHANNELMODE, mode)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set channel mode %d: %s\n", mode, aac_get_error(err));
+ goto error;
+ }
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_CHANNELORDER, 1)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set wav channel order %d: %s\n", mode, aac_get_error(err));
+ goto error;
+ }
+
+ if (avctx->flags & CODEC_FLAG_QSCALE) {
+ int mode = av_clip(avctx->global_quality, 1, 5);
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_BITRATEMODE, mode)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the VBR bitrate mode %d: %s\n", mode, aac_get_error(err));
+ goto error;
+ }
+ } else {
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_BITRATE, avctx->bit_rate)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the bitrate %d: %s\n", avctx->bit_rate, aac_get_error(err));
+ goto error;
+ }
+ }
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_TRANSMUX, avctx->flags & CODEC_FLAG_GLOBAL_HEADER ? 0 : 2)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the transmux format: %s\n", aac_get_error(err));
+ goto error;
+ }
+
+ if (s->signaling < 0)
+ s->signaling = avctx->flags & CODEC_FLAG_GLOBAL_HEADER ? 2 : 0;
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_SIGNALING_MODE, s->signaling)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set signaling mode %d: %s\n", s->signaling, aac_get_error(err));
+ goto error;
+ }
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_AFTERBURNER, s->afterburner)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set afterburner to %d: %s\n", s->afterburner, aac_get_error(err));
+ goto error;
+ }
+
+ if ((err = aacEncEncode(s->handle, NULL, NULL, NULL, NULL)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to initialize the encoder: %s\n", aac_get_error(err));
+ return AVERROR(EINVAL);
+ }
+
+ if ((err = aacEncInfo(s->handle, &info)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to get encoder info: %s\n", aac_get_error(err));
+ goto error;
+ }
+
+#if FF_API_OLD_ENCODE_AUDIO
+ avctx->coded_frame = avcodec_alloc_frame();
+ if (!avctx->coded_frame)
+ return AVERROR(ENOMEM);
+#endif
+ avctx->frame_size = info.frameLength;
+ avctx->delay = info.encoderDelay;
+ ff_af_queue_init(avctx, &s->afq);
+
+ if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+ avctx->extradata_size = info.confSize;
+ avctx->extradata = av_mallocz(avctx->extradata_size +
+ FF_INPUT_BUFFER_PADDING_SIZE);
+ if (!avctx->extradata) {
+ ret = AVERROR(ENOMEM);
+ goto error;
+ }
+
+ memcpy(avctx->extradata, info.confBuf, info.confSize);
+ }
+ return 0;
+error:
+ aac_encode_close(avctx);
+ return ret;
+}
+
+static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+ const AVFrame *frame, int *got_packet_ptr)
+{
+ AACContext *s = avctx->priv_data;
+ AACENC_BufDesc in_buf = { 0 }, out_buf = { 0 };
+ AACENC_InArgs in_args = { 0 };
+ AACENC_OutArgs out_args = { 0 };
+ int in_buffer_identifier = IN_AUDIO_DATA;
+ int in_buffer_size, in_buffer_element_size;
+ int out_buffer_identifier = OUT_BITSTREAM_DATA;
+ int out_buffer_size, out_buffer_element_size;
+ void *in_ptr, *out_ptr;
+ int ret;
+ AACENC_ERROR err;
+
+ /* handle end-of-stream small frame and flushing */
+ if (!frame) {
+ in_args.numInSamples = -1;
+ } else {
+ in_ptr = frame->data[0];
+ in_buffer_size = 2 * avctx->channels * frame->nb_samples;
+ in_buffer_element_size = 2;
+
+ in_args.numInSamples = avctx->channels * frame->nb_samples;
+ in_buf.numBufs = 1;
+ in_buf.bufs = &in_ptr;
+ in_buf.bufferIdentifiers = &in_buffer_identifier;
+ in_buf.bufSizes = &in_buffer_size;
+ in_buf.bufElSizes = &in_buffer_element_size;
+
+ /* add current frame to the queue */
+ if ((ret = ff_af_queue_add(&s->afq, frame) < 0))
+ return ret;
+ }
+
+ if ((ret = ff_alloc_packet(avpkt, FFMAX(8192, 768 * avctx->channels)))) {
+ av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+ return ret;
+ }
+
+ out_ptr = avpkt->data;
+ out_buffer_size = avpkt->size;
+ out_buffer_element_size = 1;
+ out_buf.numBufs = 1;
+ out_buf.bufs = &out_ptr;
+ out_buf.bufferIdentifiers = &out_buffer_identifier;
+ out_buf.bufSizes = &out_buffer_size;
+ out_buf.bufElSizes = &out_buffer_element_size;
+
+ if ((err = aacEncEncode(s->handle, &in_buf, &out_buf, &in_args, &out_args)) != AACENC_OK) {
+ if (!frame && err == AACENC_ENCODE_EOF)
+ return 0;
+ av_log(avctx, AV_LOG_ERROR, "Unable to encode frame: %s\n", aac_get_error(err));
+ return AVERROR(EINVAL);
+ }
+
+ if (!out_args.numOutBytes)
+ return 0;
+
+ /* Get the next frame pts/duration */
+ ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
+ &avpkt->duration);
+
+ avpkt->size = out_args.numOutBytes;
+ *got_packet_ptr = 1;
+ return 0;
+}
+
+static const AVProfile profiles[] = {
+ { FF_PROFILE_AAC_LOW, "LC" },
+ { FF_PROFILE_AAC_HE, "HE-AAC" },
+ { FF_PROFILE_AAC_HE_V2, "HE-AACv2" },
+ { FF_PROFILE_AAC_LD, "LD" },
+ { FF_PROFILE_AAC_ELD, "ELD" },
+ { FF_PROFILE_UNKNOWN },
+};
+
+AVCodec ff_libfdk_aac_encoder = {
+ .name = "libfdk_aac",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .id = CODEC_ID_AAC,
+ .priv_data_size = sizeof(AACContext),
+ .init = aac_encode_init,
+ .encode2 = aac_encode_frame,
+ .close = aac_encode_close,
+ .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
+ .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
+ AV_SAMPLE_FMT_NONE },
+ .long_name = NULL_IF_CONFIG_SMALL("Fraunhofer FDK AAC"),
+ .priv_class = &aac_enc_class,
+ .profiles = profiles,
+};
--
1.7.9.4
Luca Barbato
2012-07-11 13:18:27 UTC
Permalink
Post by Martin Storsjö
For enabling VBR, the general consensus seems to be to use the
qscale flag. There doesn't seem to be any consistent way to
indicate the actual desired quality though. Both libfaac and
libmp3lame calculate avctx->global_quality / FF_QP2LAMBDA and set
that as the libraries' VBR quality parameters, with wildly different
results. On libmp3lame, the VBR quality parameter is between 0 (best)
and 10 (worst), while the scale goes in the opposite direction for
libfaac, where higher quality values gives you better quality.
Therefore, for now, I just pass the actual value of
avctx->global_quality through. You can set it to values between 1
1 - about 32 kbps/channel
2 - about 40 kbps/channel
3 - about 48-56 kbps/channel
4 - about 64 kbps/channel
5 - about 80-96 kbps/channel
---
Looks fine, just should we mark it non-free or not?

lu
--
Luca Barbato
Gentoo/linux
http://dev.gentoo.org/~lu_zero
Martin Storsjö
2012-07-11 13:19:46 UTC
Permalink
Post by Luca Barbato
Post by Martin Storsjö
For enabling VBR, the general consensus seems to be to use the
qscale flag. There doesn't seem to be any consistent way to
indicate the actual desired quality though. Both libfaac and
libmp3lame calculate avctx->global_quality / FF_QP2LAMBDA and set
that as the libraries' VBR quality parameters, with wildly different
results. On libmp3lame, the VBR quality parameter is between 0 (best)
and 10 (worst), while the scale goes in the opposite direction for
libfaac, where higher quality values gives you better quality.
Therefore, for now, I just pass the actual value of
avctx->global_quality through. You can set it to values between 1
1 - about 32 kbps/channel
2 - about 40 kbps/channel
3 - about 48-56 kbps/channel
4 - about 64 kbps/channel
5 - about 80-96 kbps/channel
---
Looks fine, just should we mark it non-free or not?
That's the big question, IIRC j-b and Diego had differing views on how to
interpret it.

// Martin
Jean-Baptiste Kempf
2012-07-11 13:23:53 UTC
Permalink
Post by Martin Storsjö
That's the big question, IIRC j-b and Diego had differing views on
how to interpret it.
"You may not charge copyright license fees for anyone to use, copy or
distribute the FDK AAC Codec software or your modifications thereto."

This looks like freeware, not open source, to me.

It is allowed to resell GPL binaries, and this provision blocks it;
and it limits the usage of the software.
--
Jean-Baptiste Kempf
http://www.jbkempf.com/ - +33 672 704 734
Sent from my Electronic Device
Janne Grunau
2012-07-11 13:48:54 UTC
Permalink
Post by Jean-Baptiste Kempf
Post by Martin Storsjö
That's the big question, IIRC j-b and Diego had differing views on
how to interpret it.
"You may not charge copyright license fees for anyone to use, copy or
distribute the FDK AAC Codec software or your modifications thereto."
This looks like freeware, not open source, to me.
It is allowed to resell GPL binaries, and this provision blocks it;
and it limits the usage of the software.
But is that a "copyright license fee"? some GPL v2 quotes:

| Our General Public Licenses are designed to make sure that you have
| the freedom to distribute copies of free software (and charge for
| this service if you wish)

| You may charge a fee for the physical act of transferring a copy,
| and you may at your option offer warranty protection in exchange
| for a fee.

| BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE

I agree that it's a bit irky and I would ask at least one lawyer
before considering it (L)GPL compatible.

Janne
Luca Barbato
2012-07-11 14:30:14 UTC
Permalink
Post by Janne Grunau
Post by Jean-Baptiste Kempf
Post by Martin Storsjö
That's the big question, IIRC j-b and Diego had differing views on
how to interpret it.
"You may not charge copyright license fees for anyone to use, copy or
distribute the FDK AAC Codec software or your modifications thereto."
This looks like freeware, not open source, to me.
It is allowed to resell GPL binaries, and this provision blocks it;
and it limits the usage of the software.
| Our General Public Licenses are designed to make sure that you have
| the freedom to distribute copies of free software (and charge for
| this service if you wish)
| You may charge a fee for the physical act of transferring a copy,
| and you may at your option offer warranty protection in exchange
| for a fee.
| BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE
I agree that it's a bit irky and I would ask at least one lawyer
before considering it (L)GPL compatible.
I'd add something along those lines in the boilerplate for the wrapper
and consider it free.

lu
--
Luca Barbato
Gentoo/linux
http://dev.gentoo.org/~lu_zero
Diego Elio Pettenò
2012-07-11 13:54:43 UTC
Permalink
Post by Jean-Baptiste Kempf
This looks like freeware, not open source, to me.
It certainly seems to be okay with OSI, afaict, it might or might not be
Free Software...
Post by Jean-Baptiste Kempf
It is allowed to resell GPL binaries, and this provision blocks it;
and it limits the usage of the software.
As Janne points out that wouldn't be copyright fee, although there might
be other issues involved as this is an "additional restriction".

Would help to ask FSF for an evaluation I guess.
--
Diego Elio Pettenò — Flameeyes
***@flameeyes.eu — http://blog.flameeyes.eu/
Derek Buitenhuis
2012-07-11 15:19:06 UTC
Permalink
Post by Martin Storsjö
For enabling VBR, the general consensus seems to be to use the
qscale flag. There doesn't seem to be any consistent way to
indicate the actual desired quality though. Both libfaac and
libmp3lame calculate avctx->global_quality / FF_QP2LAMBDA and set
that as the libraries' VBR quality parameters, with wildly different
results. On libmp3lame, the VBR quality parameter is between 0 (best)
and 10 (worst), while the scale goes in the opposite direction for
libfaac, where higher quality values gives you better quality.
Therefore, for now, I just pass the actual value of
avctx->global_quality through. You can set it to values between 1
1 - about 32 kbps/channel
2 - about 40 kbps/channel
3 - about 48-56 kbps/channel
4 - about 64 kbps/channel
5 - about 80-96 kbps/channel
---
-Spun off Google Android sources, OpenCore and VisualOn libraries provide
+Spun off Google Android sources, OpenCore, VisualOn and Fraunhofer
+libraries provide
encoders for a number of audio codecs.
These lines should probably be merged, no?
Post by Martin Storsjö
+static const AVOption aac_enc_options[] = {
+ { "afterburner", "Afterburner (improved quality)", offsetof(AACContext, afterburner), AV_OPT_TYPE_INT, { 1 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
+ { "eld_sbr", "Enable SBR for ELD (for SBR in other configurations, use the -profile parameter)", offsetof(AACContext, eld_sbr), AV_OPT_TYPE_INT, { 0 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
+ { "signaling", "SBR/PS signaling style", offsetof(AACContext, signaling), AV_OPT_TYPE_INT, { -1 }, -1, 2, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "default", "Choose signaling implicitly (explicit hierarchical by default, implicit if global header is disabled)", 0, AV_OPT_TYPE_CONST, { -1 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "implicit", "Implicit backwards compatible signaling", 0, AV_OPT_TYPE_CONST, { 0 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "explicit_sbr", "Explicit SBR, implicit PS signaling", 0, AV_OPT_TYPE_CONST, { 1 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "explicit_hierarchical", "Explicit hierarchical signaling", 0, AV_OPT_TYPE_CONST, { 2 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { NULL }
+};
+
+static const AVClass aac_enc_class = {
+ "libfdk_aac", av_default_item_name, aac_enc_options, LIBAVUTIL_VERSION_INT
+};
Both of these can be broken up nicely so they're not lolhuge.
This can be done in many areas in this patch, so I will
refrain from noting them all.
Post by Martin Storsjö
+#if FF_API_OLD_ENCODE_AUDIO
+ av_freep(&avctx->coded_frame);
+#endif
Unrelated to the review -- are we dropping this sort of
thing for the next release?
Post by Martin Storsjö
+static av_cold int aac_encode_init(AVCodecContext *avctx)
+{
+ AACContext *s = avctx->priv_data;
+ int ret = AVERROR(EINVAL);
+ AACENC_InfoStruct info = { 0 };
+ CHANNEL_MODE mode;
+ AACENC_ERROR err;
+ int aot = FF_PROFILE_AAC_LOW + 1;
+
+ if ((err = aacEncOpen(&s->handle, 0, avctx->channels)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to open the encoder: %s\n", aac_get_error(err));
+ goto error;
+ }
I know this is personal taste, but is there not a more elegant
way to handle errors? :/
Post by Martin Storsjö
+ if (avctx->profile != FF_PROFILE_UNKNOWN)
+ aot = avctx->profile + 1;
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_AOT, aot)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the AOT %d: %s\n", aot, aac_get_error(err));
+ goto error;
+ }
Needs a line break between these two if statements. Also, perhaps
it should print a warning if it is passed an invalid profile?
Might be better than silently falling back on aac_low.
Post by Martin Storsjö
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_CHANNELORDER, 1)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set wav channel order %d: %s\n", mode, aac_get_error(err));
+ goto error;
+ }
Not sure what 'wav channel order' means? WAVEFORMATEXTENSIBLE?
Post by Martin Storsjö
+ if (avctx->flags & CODEC_FLAG_QSCALE) {
+ int mode = av_clip(avctx->global_quality, 1, 5);
We should print a warning instead of silently clipping.
Post by Martin Storsjö
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_TRANSMUX, avctx->flags & CODEC_FLAG_GLOBAL_HEADER ? 0 : 2)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the transmux format: %s\n", aac_get_error(err));
+ goto error;
+ }
Can you add a comment or something here? It's not immediately
clear what's happening.
Post by Martin Storsjö
+ if (s->signaling < 0)
+ s->signaling = avctx->flags & CODEC_FLAG_GLOBAL_HEADER ? 2 : 0;
We use this exact thing directly above. Should be set, then used there
instead of duplicating code.
Post by Martin Storsjö
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_AFTERBURNER, s->afterburner)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set afterburner to %d: %s\n", s->afterburner, aac_get_error(err));
+ goto error;
+ }
Isn't the only possible option for afterburner 0/1? Shouldn't we force
it to a boolean? (!!var)
Post by Martin Storsjö
+#if FF_API_OLD_ENCODE_AUDIO
+ avctx->coded_frame = avcodec_alloc_frame();
+ if (!avctx->coded_frame)
+ return AVERROR(ENOMEM);
+#endif
Shouldn't all the mallocing be done before we go through the
trouble of initializing the decoder? Also, this is the only
time goto error isn't used. Why?
Post by Martin Storsjö
+ if (!avctx->extradata) {
+ ret = AVERROR(ENOMEM);
+ goto error;
+ }
Isn't it possible, while using the old audio API, that coded_frame
gets allocated, but then extradata allocation fails, but coded_frame
is never freed? Seems like a memleak.

Secondly, does fdk-aac not have a cleanup function for all the mem
it has allocated?
Post by Martin Storsjö
+ in_ptr = frame->data[0];
+ in_buffer_size = 2 * avctx->channels * frame->nb_samples;
+ in_buffer_element_size = 2;
+
+ in_args.numInSamples = avctx->channels * frame->nb_samples;
+ in_buf.numBufs = 1;
+ in_buf.bufs = &in_ptr;
+ in_buf.bufferIdentifiers = &in_buffer_identifier;
+ in_buf.bufSizes = &in_buffer_size;
+ in_buf.bufElSizes = &in_buffer_element_size;
In the words of Diego...

nit: Align.
Post by Martin Storsjö
+ if ((ret = ff_alloc_packet(avpkt, FFMAX(8192, 768 * avctx->channels)))) {
+ av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+ return ret;
+ }
Comment on where FFMAX(8192, 768 * avctx->channels) comes from, please.
Post by Martin Storsjö
+ out_ptr = avpkt->data;
+ out_buffer_size = avpkt->size;
+ out_buffer_element_size = 1;
+ out_buf.numBufs = 1;
+ out_buf.bufs = &out_ptr;
+ out_buf.bufferIdentifiers = &out_buffer_identifier;
+ out_buf.bufSizes = &out_buffer_size;
+ out_buf.bufElSizes = &out_buffer_element_size;
The alignment faery beckons ye!
Post by Martin Storsjö
+ if (!out_args.numOutBytes)
+ return 0;
Is this really proper?
Post by Martin Storsjö
+ /* Get the next frame pts/duration */
+ ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
+ &avpkt->duration);
s#pts/#pts & #
Post by Martin Storsjö
+ avpkt->size = out_args.numOutBytes;
+ *got_packet_ptr = 1;
Needs faery dust.

- Derek
Martin Storsjö
2012-07-11 18:17:24 UTC
Permalink
Post by Derek Buitenhuis
Post by Martin Storsjö
For enabling VBR, the general consensus seems to be to use the
qscale flag. There doesn't seem to be any consistent way to
indicate the actual desired quality though. Both libfaac and
libmp3lame calculate avctx->global_quality / FF_QP2LAMBDA and set
that as the libraries' VBR quality parameters, with wildly different
results. On libmp3lame, the VBR quality parameter is between 0 (best)
and 10 (worst), while the scale goes in the opposite direction for
libfaac, where higher quality values gives you better quality.
Therefore, for now, I just pass the actual value of
avctx->global_quality through. You can set it to values between 1
1 - about 32 kbps/channel
2 - about 40 kbps/channel
3 - about 48-56 kbps/channel
4 - about 64 kbps/channel
5 - about 80-96 kbps/channel
---
-Spun off Google Android sources, OpenCore and VisualOn libraries provide
+Spun off Google Android sources, OpenCore, VisualOn and Fraunhofer
+libraries provide
encoders for a number of audio codecs.
These lines should probably be merged, no?
I guess they could - this keeps the diff clearer and smaller IMO, but if
you prefer I can rewrap the lines...
Post by Derek Buitenhuis
Post by Martin Storsjö
+static const AVOption aac_enc_options[] = {
+ { "afterburner", "Afterburner (improved quality)", offsetof(AACContext, afterburner), AV_OPT_TYPE_INT, { 1 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
+ { "eld_sbr", "Enable SBR for ELD (for SBR in other configurations, use the -profile parameter)", offsetof(AACContext, eld_sbr), AV_OPT_TYPE_INT, { 0 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
+ { "signaling", "SBR/PS signaling style", offsetof(AACContext, signaling), AV_OPT_TYPE_INT, { -1 }, -1, 2, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "default", "Choose signaling implicitly (explicit hierarchical by default, implicit if global header is disabled)", 0, AV_OPT_TYPE_CONST, { -1 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "implicit", "Implicit backwards compatible signaling", 0, AV_OPT_TYPE_CONST, { 0 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "explicit_sbr", "Explicit SBR, implicit PS signaling", 0, AV_OPT_TYPE_CONST, { 1 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "explicit_hierarchical", "Explicit hierarchical signaling", 0, AV_OPT_TYPE_CONST, { 2 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { NULL }
+};
+
+static const AVClass aac_enc_class = {
+ "libfdk_aac", av_default_item_name, aac_enc_options, LIBAVUTIL_VERSION_INT
+};
Both of these can be broken up nicely so they're not lolhuge.
This can be done in many areas in this patch, so I will
refrain from noting them all.
Umm, you mean wrapping the lines? I prefer not doing that for the
avoptions, that usually ends up even more unreaable. I can wrap some of
the function calls below though.
Post by Derek Buitenhuis
Post by Martin Storsjö
+#if FF_API_OLD_ENCODE_AUDIO
+ av_freep(&avctx->coded_frame);
+#endif
Unrelated to the review -- are we dropping this sort of
thing for the next release?
At the next ABI bump I think, yes.
Post by Derek Buitenhuis
Post by Martin Storsjö
+static av_cold int aac_encode_init(AVCodecContext *avctx)
+{
+ AACContext *s = avctx->priv_data;
+ int ret = AVERROR(EINVAL);
+ AACENC_InfoStruct info = { 0 };
+ CHANNEL_MODE mode;
+ AACENC_ERROR err;
+ int aot = FF_PROFILE_AAC_LOW + 1;
+
+ if ((err = aacEncOpen(&s->handle, 0, avctx->channels)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to open the encoder: %s\n", aac_get_error(err));
+ goto error;
+ }
I know this is personal taste, but is there not a more elegant
way to handle errors? :/
Post by Martin Storsjö
+ if (avctx->profile != FF_PROFILE_UNKNOWN)
+ aot = avctx->profile + 1;
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_AOT, aot)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the AOT %d: %s\n", aot, aac_get_error(err));
+ goto error;
+ }
Needs a line break between these two if statements. Also, perhaps
it should print a warning if it is passed an invalid profile?
Might be better than silently falling back on aac_low.
Where does it silently fall back on aac_low? If you have set
avctx->profile, we will use that profile, and if the library doesn't
support it, it will fail at this point, indicating that it doesn't support
this AOT.
Post by Derek Buitenhuis
Post by Martin Storsjö
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_CHANNELORDER, 1)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set wav channel order %d: %s\n", mode, aac_get_error(err));
+ goto error;
+ }
Not sure what 'wav channel order' means? WAVEFORMATEXTENSIBLE?
I guess that's the proper name for it. The encoder has a choice between
mpeg channel ordering and wav file format channel ordering, where the
latter is what we use in libavcodec.
Post by Derek Buitenhuis
Post by Martin Storsjö
+ if (avctx->flags & CODEC_FLAG_QSCALE) {
+ int mode = av_clip(avctx->global_quality, 1, 5);
We should print a warning instead of silently clipping.
Ok
Post by Derek Buitenhuis
Post by Martin Storsjö
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_TRANSMUX, avctx->flags & CODEC_FLAG_GLOBAL_HEADER ? 0 : 2)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the transmux format: %s\n", aac_get_error(err));
+ goto error;
+ }
Can you add a comment or something here? It's not immediately
clear what's happening.
Will do
Post by Derek Buitenhuis
Post by Martin Storsjö
+ if (s->signaling < 0)
+ s->signaling = avctx->flags & CODEC_FLAG_GLOBAL_HEADER ? 2 : 0;
We use this exact thing directly above. Should be set, then used there
instead of duplicating code.
No, it's not exactly the same, it's the inverse, and the fact that both
use numbers 0 and 2 is a coincidence.

Above is
if (extradata requested) { mp4 format } else { adts format }.
This one is:
if (no signalling mode requested by the caller)
if (extradata requested) { explicit hierarchical signalling } else
{ implicit signalling }
Where mp4 format is 0, adts format is 2, implicit signalling is 0 and
hierarchical signalling is 2.
Post by Derek Buitenhuis
Post by Martin Storsjö
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_AFTERBURNER, s->afterburner)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set afterburner to %d: %s\n", s->afterburner, aac_get_error(err));
+ goto error;
+ }
Isn't the only possible option for afterburner 0/1? Shouldn't we force
it to a boolean? (!!var)
It shouldn't be needed, the AVOption limits it to the range 0-1.
Post by Derek Buitenhuis
Post by Martin Storsjö
+#if FF_API_OLD_ENCODE_AUDIO
+ avctx->coded_frame = avcodec_alloc_frame();
+ if (!avctx->coded_frame)
+ return AVERROR(ENOMEM);
+#endif
Shouldn't all the mallocing be done before we go through the
trouble of initializing the decoder? Also, this is the only
time goto error isn't used. Why?
It should use goto error, I'll fix it.
Post by Derek Buitenhuis
Post by Martin Storsjö
+ if (!avctx->extradata) {
+ ret = AVERROR(ENOMEM);
+ goto error;
+ }
Isn't it possible, while using the old audio API, that coded_frame
gets allocated, but then extradata allocation fails, but coded_frame
is never freed? Seems like a memleak.
Secondly, does fdk-aac not have a cleanup function for all the mem
it has allocated?
Umm, this one does use goto error, which calls aac_encode_close, which
frees coded_frame, and calls aacEncClose for freeing what the lib
allocated.
Post by Derek Buitenhuis
Post by Martin Storsjö
+ in_ptr = frame->data[0];
+ in_buffer_size = 2 * avctx->channels * frame->nb_samples;
+ in_buffer_element_size = 2;
+
+ in_args.numInSamples = avctx->channels * frame->nb_samples;
+ in_buf.numBufs = 1;
+ in_buf.bufs = &in_ptr;
+ in_buf.bufferIdentifiers = &in_buffer_identifier;
+ in_buf.bufSizes = &in_buffer_size;
+ in_buf.bufElSizes = &in_buffer_element_size;
In the words of Diego...
nit: Align.
Will do
Post by Derek Buitenhuis
Post by Martin Storsjö
+ if ((ret = ff_alloc_packet(avpkt, FFMAX(8192, 768 * avctx->channels)))) {
+ av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+ return ret;
+ }
Comment on where FFMAX(8192, 768 * avctx->channels) comes from, please.
I copied this from vo-aacenc, where Justin added it at some point. I think
the encoder lib has a comment about the bounds of the encoded data, I'll
try to look it up.
Post by Derek Buitenhuis
Post by Martin Storsjö
+ out_ptr = avpkt->data;
+ out_buffer_size = avpkt->size;
+ out_buffer_element_size = 1;
+ out_buf.numBufs = 1;
+ out_buf.bufs = &out_ptr;
+ out_buf.bufferIdentifiers = &out_buffer_identifier;
+ out_buf.bufSizes = &out_buffer_size;
+ out_buf.bufElSizes = &out_buffer_element_size;
The alignment faery beckons ye!
Post by Martin Storsjö
+ if (!out_args.numOutBytes)
+ return 0;
Is this really proper?
I don't see what wouldn't be proper here. We consumed the input data, but
the encoder didn't output anything yet, so we signal that everything is ok
but no data was returned.
Post by Derek Buitenhuis
Post by Martin Storsjö
+ /* Get the next frame pts/duration */
+ ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
+ &avpkt->duration);
s#pts/#pts & #
Ok
Post by Derek Buitenhuis
Post by Martin Storsjö
+ avpkt->size = out_args.numOutBytes;
+ *got_packet_ptr = 1;
Needs faery dust.
Ok

// Martin
Martin Storsjö
2012-07-11 18:49:02 UTC
Permalink
The numerical values of the profiles are the MPEG4 Audio Object
Type values, minus one.
---
libavcodec/avcodec.h | 4 ++++
libavcodec/options_table.h | 4 ++++
2 files changed, 8 insertions(+)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index a1103e9..0c962cc 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2714,6 +2714,10 @@ typedef struct AVCodecContext {
#define FF_PROFILE_AAC_LOW 1
#define FF_PROFILE_AAC_SSR 2
#define FF_PROFILE_AAC_LTP 3
+#define FF_PROFILE_AAC_HE 4
+#define FF_PROFILE_AAC_HE_V2 28
+#define FF_PROFILE_AAC_LD 22
+#define FF_PROFILE_AAC_ELD 38

#define FF_PROFILE_DTS 20
#define FF_PROFILE_DTS_ES 30
diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index 7f5b643..4f903cc 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -316,6 +316,10 @@ static const AVOption options[]={
{"aac_low", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_LOW }, INT_MIN, INT_MAX, A|E, "profile"},
{"aac_ssr", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_SSR }, INT_MIN, INT_MAX, A|E, "profile"},
{"aac_ltp", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_LTP }, INT_MIN, INT_MAX, A|E, "profile"},
+{"aac_he", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_HE }, INT_MIN, INT_MAX, A|E, "profile"},
+{"aac_he_v2", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_HE_V2 }, INT_MIN, INT_MAX, A|E, "profile"},
+{"aac_ld", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_LD }, INT_MIN, INT_MAX, A|E, "profile"},
+{"aac_eld", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_AAC_ELD }, INT_MIN, INT_MAX, A|E, "profile"},
{"dts", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_DTS }, INT_MIN, INT_MAX, A|E, "profile"},
{"dts_es", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_DTS_ES }, INT_MIN, INT_MAX, A|E, "profile"},
{"dts_96_24", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_DTS_96_24 }, INT_MIN, INT_MAX, A|E, "profile"},
--
1.7.9.4
Martin Storsjö
2012-07-11 18:49:03 UTC
Permalink
For enabling VBR, the general consensus seems to be to use the
qscale flag. There doesn't seem to be any consistent way to
indicate the actual desired quality though. Both libfaac and
libmp3lame calculate avctx->global_quality / FF_QP2LAMBDA and set
that as the libraries' VBR quality parameters, with wildly different
results. On libmp3lame, the VBR quality parameter is between 0 (best)
and 10 (worst), while the scale goes in the opposite direction for
libfaac, where higher quality values gives you better quality.

Therefore, for now, I just pass the actual value of
avctx->global_quality through. You can set it to values between 1
and 5:
1 - about 32 kbps/channel
2 - about 40 kbps/channel
3 - about 48-56 kbps/channel
4 - about 64 kbps/channel
5 - about 80-96 kbps/channel
---
Changelog | 1 +
configure | 5 +
doc/general.texi | 12 +-
libavcodec/Makefile | 1 +
libavcodec/allcodecs.c | 1 +
libavcodec/libfdk-aacenc.c | 354 ++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 372 insertions(+), 2 deletions(-)
create mode 100644 libavcodec/libfdk-aacenc.c

diff --git a/Changelog b/Changelog
index 2fb5e3d..c56740c 100644
--- a/Changelog
+++ b/Changelog
@@ -33,6 +33,7 @@ version <next>:
- Microsoft ATC Screen decoder
- RTSP listen mode
- TechSmith Screen Codec 2 decoder
+- AAC encoding via libfdk-aac


version 0.8:
diff --git a/configure b/configure
index 2888c72..282974c 100755
--- a/configure
+++ b/configure
@@ -170,6 +170,7 @@ External library support:
--enable-libdc1394 enable IIDC-1394 grabbing using libdc1394
and libraw1394 [no]
--enable-libfaac enable FAAC support via libfaac [no]
+ --enable-libfdk-aac enable AAC support via libfdk-aac [no]
--enable-libfreetype enable libfreetype [no]
--enable-libgsm enable GSM support via libgsm [no]
--enable-libilbc enable iLBC de/encoding via libilbc [no]
@@ -943,6 +944,7 @@ CONFIG_LIST="
libcdio
libdc1394
libfaac
+ libfdk_aac
libfreetype
libgsm
libilbc
@@ -1447,6 +1449,7 @@ h264_parser_select="golomb h264dsp h264pred"

# external libraries
libfaac_encoder_deps="libfaac"
+libfdk_aac_encoder_deps="libfdk_aac"
libgsm_decoder_deps="libgsm"
libgsm_encoder_deps="libgsm"
libgsm_ms_decoder_deps="libgsm"
@@ -2966,6 +2969,7 @@ enabled avisynth && require2 vfw32 "windows.h vfw.h" AVIFileInit -lavifil32
enabled frei0r && { check_header frei0r.h || die "ERROR: frei0r.h header not found"; }
enabled gnutls && require_pkg_config gnutls gnutls/gnutls.h gnutls_global_init
enabled libfaac && require2 libfaac "stdint.h faac.h" faacEncGetVersion -lfaac
+enabled libfdk_aac && require libfdk_aac fdk-aac/aacenc_lib.h aacEncOpen -lfdk-aac
enabled libfreetype && require_pkg_config freetype2 "ft2build.h freetype/freetype.h" FT_Init_FreeType
enabled libgsm && require libgsm gsm/gsm.h gsm_create -lgsm
enabled libilbc && require libilbc ilbc.h WebRtcIlbcfix_InitDecode -lilbc
@@ -3257,6 +3261,7 @@ echo "gnutls enabled ${gnutls-no}"
echo "libcdio support ${libcdio-no}"
echo "libdc1394 support ${libdc1394-no}"
echo "libfaac enabled ${libfaac-no}"
+echo "libfdk-aac enabled ${libfdk_aac-no}"
echo "libgsm enabled ${libgsm-no}"
echo "libilbc enabled ${libilbc-no}"
echo "libmp3lame enabled ${libmp3lame-no}"
diff --git a/doc/general.texi b/doc/general.texi
index 7e9cfaf..fcac114 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -18,8 +18,8 @@ explicitly requested by passing the appropriate flags to

@section OpenCORE and VisualOn libraries

-Spun off Google Android sources, OpenCore and VisualOn libraries provide
-encoders for a number of audio codecs.
+Spun off Google Android sources, OpenCore, VisualOn and Fraunhofer
+libraries provide encoders for a number of audio codecs.

@float NOTE
OpenCORE and VisualOn libraries are under the Apache License 2.0
@@ -55,6 +55,14 @@ Go to @url{http://sourceforge.net/projects/opencore-amr/} and follow the
instructions for installing the library.
Then pass @code{--enable-libvo-amrwbenc} to configure to enable it.

+@subsection Fraunhofer AAC library
+
+Libav can make use of the Fraunhofer AAC library for AAC encoding.
+
+Go to @url{http://sourceforge.net/projects/opencore-amr/} and follow the
+instructions for installing the library.
+Then pass @code{--enable-libfdk-aac} to configure to enable it.
+
@section LAME

Libav can make use of the LAME library for MP3 encoding.
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index ac97d34..8d38ca2 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -595,6 +595,7 @@ OBJS-$(CONFIG_WTV_DEMUXER) += mpeg4audio.o mpegaudiodata.o

# external codec libraries
OBJS-$(CONFIG_LIBFAAC_ENCODER) += libfaac.o audio_frame_queue.o
+OBJS-$(CONFIG_LIBFDK_AAC_ENCODER) += libfdk-aacenc.o audio_frame_queue.o
OBJS-$(CONFIG_LIBGSM_DECODER) += libgsm.o
OBJS-$(CONFIG_LIBGSM_ENCODER) += libgsm.o
OBJS-$(CONFIG_LIBGSM_MS_DECODER) += libgsm.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 068f191..bd48728 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -380,6 +380,7 @@ void avcodec_register_all(void)

/* external libraries */
REGISTER_ENCODER (LIBFAAC, libfaac);
+ REGISTER_ENCODER (LIBFDK_AAC, libfdk_aac);
REGISTER_ENCDEC (LIBGSM, libgsm);
REGISTER_ENCDEC (LIBGSM_MS, libgsm_ms);
REGISTER_ENCDEC (LIBILBC, libilbc);
diff --git a/libavcodec/libfdk-aacenc.c b/libavcodec/libfdk-aacenc.c
new file mode 100644
index 0000000..e25cb6d
--- /dev/null
+++ b/libavcodec/libfdk-aacenc.c
@@ -0,0 +1,354 @@
+/*
+ * AAC encoder wrapper
+ * Copyright (c) 2012 Martin Storsjo
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <fdk-aac/aacenc_lib.h>
+
+#include "avcodec.h"
+#include "audio_frame_queue.h"
+#include "internal.h"
+#include "libavutil/opt.h"
+
+typedef struct AACContext {
+ const AVClass *class;
+ HANDLE_AACENCODER handle;
+ int afterburner;
+ int eld_sbr;
+ int signaling;
+
+ AudioFrameQueue afq;
+} AACContext;
+
+static const AVOption aac_enc_options[] = {
+ { "afterburner", "Afterburner (improved quality)", offsetof(AACContext, afterburner), AV_OPT_TYPE_INT, { 1 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
+ { "eld_sbr", "Enable SBR for ELD (for SBR in other configurations, use the -profile parameter)", offsetof(AACContext, eld_sbr), AV_OPT_TYPE_INT, { 0 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
+ { "signaling", "SBR/PS signaling style", offsetof(AACContext, signaling), AV_OPT_TYPE_INT, { -1 }, -1, 2, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "default", "Choose signaling implicitly (explicit hierarchical by default, implicit if global header is disabled)", 0, AV_OPT_TYPE_CONST, { -1 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "implicit", "Implicit backwards compatible signaling", 0, AV_OPT_TYPE_CONST, { 0 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "explicit_sbr", "Explicit SBR, implicit PS signaling", 0, AV_OPT_TYPE_CONST, { 1 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "explicit_hierarchical", "Explicit hierarchical signaling", 0, AV_OPT_TYPE_CONST, { 2 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { NULL }
+};
+
+static const AVClass aac_enc_class = {
+ "libfdk_aac", av_default_item_name, aac_enc_options, LIBAVUTIL_VERSION_INT
+};
+
+static const char *aac_get_error(AACENC_ERROR err)
+{
+ switch (err) {
+ case AACENC_OK:
+ return "No error";
+ case AACENC_INVALID_HANDLE:
+ return "Invalid handle";
+ case AACENC_MEMORY_ERROR:
+ return "Memory allocation error";
+ case AACENC_UNSUPPORTED_PARAMETER:
+ return "Unsupported parameter";
+ case AACENC_INVALID_CONFIG:
+ return "Invalid config";
+ case AACENC_INIT_ERROR:
+ return "Initialization error";
+ case AACENC_INIT_AAC_ERROR:
+ return "AAC library initialization error";
+ case AACENC_INIT_SBR_ERROR:
+ return "SBR library initialization error";
+ case AACENC_INIT_TP_ERROR:
+ return "Transport library initialization error";
+ case AACENC_INIT_META_ERROR:
+ return "Metadata library initialization error";
+ case AACENC_ENCODE_ERROR:
+ return "Encoding error";
+ case AACENC_ENCODE_EOF:
+ return "End of file";
+ default:
+ return "Unknown error";
+ }
+}
+
+static int aac_encode_close(AVCodecContext *avctx)
+{
+ AACContext *s = avctx->priv_data;
+
+ if (s->handle)
+ aacEncClose(&s->handle);
+#if FF_API_OLD_ENCODE_AUDIO
+ av_freep(&avctx->coded_frame);
+#endif
+ av_freep(&avctx->extradata);
+ ff_af_queue_close(&s->afq);
+
+ return 0;
+}
+
+static av_cold int aac_encode_init(AVCodecContext *avctx)
+{
+ AACContext *s = avctx->priv_data;
+ int ret = AVERROR(EINVAL);
+ AACENC_InfoStruct info = { 0 };
+ CHANNEL_MODE mode;
+ AACENC_ERROR err;
+ int aot = FF_PROFILE_AAC_LOW + 1;
+
+ if ((err = aacEncOpen(&s->handle, 0, avctx->channels)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to open the encoder: %s\n",
+ aac_get_error(err));
+ goto error;
+ }
+
+ if (avctx->profile != FF_PROFILE_UNKNOWN)
+ aot = avctx->profile + 1;
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_AOT, aot)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the AOT %d: %s\n",
+ aot, aac_get_error(err));
+ goto error;
+ }
+
+ if (aot == FF_PROFILE_AAC_ELD + 1 && s->eld_sbr) {
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_SBR_MODE,
+ 1)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to enable SBR for ELD: %s\n",
+ aac_get_error(err));
+ goto error;
+ }
+ }
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_SAMPLERATE,
+ avctx->sample_rate)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the sample rate %d: %s\n",
+ avctx->sample_rate, aac_get_error(err));
+ goto error;
+ }
+
+ switch (avctx->channels) {
+ case 1: mode = MODE_1; break;
+ case 2: mode = MODE_2; break;
+ case 3: mode = MODE_1_2; break;
+ case 4: mode = MODE_1_2_1; break;
+ case 5: mode = MODE_1_2_2; break;
+ case 6: mode = MODE_1_2_2_1; break;
+ default:
+ av_log(avctx, AV_LOG_ERROR,
+ "Unsupported number of channels %d\n", avctx->channels);
+ goto error;
+ }
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_CHANNELMODE,
+ mode)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR,
+ "Unable to set channel mode %d: %s\n", mode, aac_get_error(err));
+ goto error;
+ }
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_CHANNELORDER,
+ 1)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR,
+ "Unable to set wav channel order %d: %s\n",
+ mode, aac_get_error(err));
+ goto error;
+ }
+
+ if (avctx->flags & CODEC_FLAG_QSCALE) {
+ int mode = avctx->global_quality;
+ if (mode < 1 || mode > 5) {
+ av_log(avctx, AV_LOG_WARNING,
+ "VBR quality %d out of range, should be 1-5\n", mode);
+ mode = av_clip(mode, 1, 5);
+ }
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_BITRATEMODE,
+ mode)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the VBR bitrate mode %d: %s\n",
+ mode, aac_get_error(err));
+ goto error;
+ }
+ } else {
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_BITRATE,
+ avctx->bit_rate)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the bitrate %d: %s\n",
+ avctx->bit_rate, aac_get_error(err));
+ goto error;
+ }
+ }
+
+ /* Choose bitstream format - if global header is requested, use
+ * raw access units, otherwise use ADTS. */
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_TRANSMUX,
+ avctx->flags & CODEC_FLAG_GLOBAL_HEADER ? 0 : 2)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the transmux format: %s\n",
+ aac_get_error(err));
+ goto error;
+ }
+
+ /* If no signaling mode is chosen, use explicit hierarchical signaling
+ * if using mp4 mode (raw access units, with global header) and
+ * implicit signaling if using ADTS. */
+ if (s->signaling < 0)
+ s->signaling = avctx->flags & CODEC_FLAG_GLOBAL_HEADER ? 2 : 0;
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_SIGNALING_MODE,
+ s->signaling)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set signaling mode %d: %s\n",
+ s->signaling, aac_get_error(err));
+ goto error;
+ }
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_AFTERBURNER,
+ s->afterburner)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set afterburner to %d: %s\n",
+ s->afterburner, aac_get_error(err));
+ goto error;
+ }
+
+ if ((err = aacEncEncode(s->handle, NULL, NULL, NULL, NULL)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to initialize the encoder: %s\n",
+ aac_get_error(err));
+ return AVERROR(EINVAL);
+ }
+
+ if ((err = aacEncInfo(s->handle, &info)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to get encoder info: %s\n",
+ aac_get_error(err));
+ goto error;
+ }
+
+#if FF_API_OLD_ENCODE_AUDIO
+ avctx->coded_frame = avcodec_alloc_frame();
+ if (!avctx->coded_frame) {
+ ret = AVERROR(ENOMEM);
+ goto error;
+ }
+#endif
+ avctx->frame_size = info.frameLength;
+ avctx->delay = info.encoderDelay;
+ ff_af_queue_init(avctx, &s->afq);
+
+ if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+ avctx->extradata_size = info.confSize;
+ avctx->extradata = av_mallocz(avctx->extradata_size +
+ FF_INPUT_BUFFER_PADDING_SIZE);
+ if (!avctx->extradata) {
+ ret = AVERROR(ENOMEM);
+ goto error;
+ }
+
+ memcpy(avctx->extradata, info.confBuf, info.confSize);
+ }
+ return 0;
+error:
+ aac_encode_close(avctx);
+ return ret;
+}
+
+static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+ const AVFrame *frame, int *got_packet_ptr)
+{
+ AACContext *s = avctx->priv_data;
+ AACENC_BufDesc in_buf = { 0 }, out_buf = { 0 };
+ AACENC_InArgs in_args = { 0 };
+ AACENC_OutArgs out_args = { 0 };
+ int in_buffer_identifier = IN_AUDIO_DATA;
+ int in_buffer_size, in_buffer_element_size;
+ int out_buffer_identifier = OUT_BITSTREAM_DATA;
+ int out_buffer_size, out_buffer_element_size;
+ void *in_ptr, *out_ptr;
+ int ret;
+ AACENC_ERROR err;
+
+ /* handle end-of-stream small frame and flushing */
+ if (!frame) {
+ in_args.numInSamples = -1;
+ } else {
+ in_ptr = frame->data[0];
+ in_buffer_size = 2 * avctx->channels * frame->nb_samples;
+ in_buffer_element_size = 2;
+
+ in_args.numInSamples = avctx->channels * frame->nb_samples;
+ in_buf.numBufs = 1;
+ in_buf.bufs = &in_ptr;
+ in_buf.bufferIdentifiers = &in_buffer_identifier;
+ in_buf.bufSizes = &in_buffer_size;
+ in_buf.bufElSizes = &in_buffer_element_size;
+
+ /* add current frame to the queue */
+ if ((ret = ff_af_queue_add(&s->afq, frame) < 0))
+ return ret;
+ }
+
+ /* The maximum packet size is 6144 bits aka 768 bytes per channel. */
+ if ((ret = ff_alloc_packet(avpkt, FFMAX(8192, 768 * avctx->channels)))) {
+ av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+ return ret;
+ }
+
+ out_ptr = avpkt->data;
+ out_buffer_size = avpkt->size;
+ out_buffer_element_size = 1;
+ out_buf.numBufs = 1;
+ out_buf.bufs = &out_ptr;
+ out_buf.bufferIdentifiers = &out_buffer_identifier;
+ out_buf.bufSizes = &out_buffer_size;
+ out_buf.bufElSizes = &out_buffer_element_size;
+
+ if ((err = aacEncEncode(s->handle, &in_buf, &out_buf, &in_args,
+ &out_args)) != AACENC_OK) {
+ if (!frame && err == AACENC_ENCODE_EOF)
+ return 0;
+ av_log(avctx, AV_LOG_ERROR, "Unable to encode frame: %s\n",
+ aac_get_error(err));
+ return AVERROR(EINVAL);
+ }
+
+ if (!out_args.numOutBytes)
+ return 0;
+
+ /* Get the next frame pts & duration */
+ ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
+ &avpkt->duration);
+
+ avpkt->size = out_args.numOutBytes;
+ *got_packet_ptr = 1;
+ return 0;
+}
+
+static const AVProfile profiles[] = {
+ { FF_PROFILE_AAC_LOW, "LC" },
+ { FF_PROFILE_AAC_HE, "HE-AAC" },
+ { FF_PROFILE_AAC_HE_V2, "HE-AACv2" },
+ { FF_PROFILE_AAC_LD, "LD" },
+ { FF_PROFILE_AAC_ELD, "ELD" },
+ { FF_PROFILE_UNKNOWN },
+};
+
+AVCodec ff_libfdk_aac_encoder = {
+ .name = "libfdk_aac",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .id = CODEC_ID_AAC,
+ .priv_data_size = sizeof(AACContext),
+ .init = aac_encode_init,
+ .encode2 = aac_encode_frame,
+ .close = aac_encode_close,
+ .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
+ .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
+ AV_SAMPLE_FMT_NONE },
+ .long_name = NULL_IF_CONFIG_SMALL("Fraunhofer FDK AAC"),
+ .priv_class = &aac_enc_class,
+ .profiles = profiles,
+};
--
1.7.9.4
Derek Buitenhuis
2012-07-11 19:27:03 UTC
Permalink
Post by Martin Storsjö
Changelog | 1 +
configure | 5 +
doc/general.texi | 12 +-
libavcodec/Makefile | 1 +
libavcodec/allcodecs.c | 1 +
libavcodec/libfdk-aacenc.c | 354 ++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 372 insertions(+), 2 deletions(-)
create mode 100644 libavcodec/libfdk-aacenc.c
Looks OK to me, along with the few comments I made in my other
email.

- Derek
Martin Storsjö
2012-07-12 08:21:25 UTC
Permalink
Post by Derek Buitenhuis
Post by Martin Storsjö
Changelog | 1 +
configure | 5 +
doc/general.texi | 12 +-
libavcodec/Makefile | 1 +
libavcodec/allcodecs.c | 1 +
libavcodec/libfdk-aacenc.c | 354 ++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 372 insertions(+), 2 deletions(-)
create mode 100644 libavcodec/libfdk-aacenc.c
Looks OK to me, along with the few comments I made in my other
email.
Pushed the updated version with Alex and Justin's comments taken into
account.

// Martin

Derek Buitenhuis
2012-07-11 19:12:25 UTC
Permalink
Post by Martin Storsjö
The numerical values of the profiles are the MPEG4 Audio Object
Type values, minus one.
---
libavcodec/avcodec.h | 4 ++++
libavcodec/options_table.h | 4 ++++
2 files changed, 8 insertions(+)
LGTM.

- Derek
Derek Buitenhuis
2012-07-11 19:11:21 UTC
Permalink
Post by Martin Storsjö
Post by Derek Buitenhuis
Both of these can be broken up nicely so they're not lolhuge.
This can be done in many areas in this patch, so I will
refrain from noting them all.
Umm, you mean wrapping the lines? I prefer not doing that for the
avoptions, that usually ends up even more unreaable. I can wrap some of
the function calls below though.
OK.
Post by Martin Storsjö
Post by Derek Buitenhuis
Needs a line break between these two if statements. Also, perhaps
it should print a warning if it is passed an invalid profile?
Might be better than silently falling back on aac_low.
Where does it silently fall back on aac_low? If you have set
avctx->profile, we will use that profile, and if the library doesn't
support it, it will fail at this point, indicating that it doesn't support
this AOT.
I misread this part. Please ignore.
Post by Martin Storsjö
Post by Derek Buitenhuis
Post by Martin Storsjö
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_CHANNELORDER, 1)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set wav channel order %d: %s\n", mode, aac_get_error(err));
+ goto error;
+ }
Not sure what 'wav channel order' means? WAVEFORMATEXTENSIBLE?
I guess that's the proper name for it. The encoder has a choice between
mpeg channel ordering and wav file format channel ordering, where the
latter is what we use in libavcodec.
Well, perhaps there are pre-defined channel orderings for wav files.
For some reason I was thinking of channel masks.
Post by Martin Storsjö
Post by Derek Buitenhuis
Post by Martin Storsjö
+ if (s->signaling < 0)
+ s->signaling = avctx->flags & CODEC_FLAG_GLOBAL_HEADER ? 2 : 0;
We use this exact thing directly above. Should be set, then used there
instead of duplicating code.
No, it's not exactly the same, it's the inverse, and the fact that both
use numbers 0 and 2 is a coincidence.
Above is
if (extradata requested) { mp4 format } else { adts format }.
if (no signalling mode requested by the caller)
if (extradata requested) { explicit hierarchical signalling } else
{ implicit signalling }
Where mp4 format is 0, adts format is 2, implicit signalling is 0 and
hierarchical signalling is 2.
Seems I missed the inverse bit. My question is why do the both use
CODEC_FLAG_GLOBAL_HEADER? Seems pretty hacky and confusing to me.
Post by Martin Storsjö
Post by Derek Buitenhuis
Comment on where FFMAX(8192, 768 * avctx->channels) comes from, please.
I copied this from vo-aacenc, where Justin added it at some point. I think
the encoder lib has a comment about the bounds of the encoded data, I'll
try to look it up.
Thanks.
Post by Martin Storsjö
Post by Derek Buitenhuis
Is this really proper?
I don't see what wouldn't be proper here. We consumed the input data, but
the encoder didn't output anything yet, so we signal that everything is ok
but no data was returned.
I'm more wondering if this is a valid thing for libfdk-aac to do? I'm
clearly no AAC expert.

And, apologies for some of the obvious things I didn't notice.

- Derek
Martin Storsjö
2012-07-11 19:30:47 UTC
Permalink
Post by Derek Buitenhuis
Post by Martin Storsjö
Post by Derek Buitenhuis
Post by Martin Storsjö
+ if (s->signaling < 0)
+ s->signaling = avctx->flags & CODEC_FLAG_GLOBAL_HEADER ? 2 : 0;
We use this exact thing directly above. Should be set, then used there
instead of duplicating code.
No, it's not exactly the same, it's the inverse, and the fact that both
use numbers 0 and 2 is a coincidence.
Above is
if (extradata requested) { mp4 format } else { adts format }.
if (no signalling mode requested by the caller)
if (extradata requested) { explicit hierarchical signalling } else
{ implicit signalling }
Where mp4 format is 0, adts format is 2, implicit signalling is 0 and
hierarchical signalling is 2.
Seems I missed the inverse bit. My question is why do the both use
CODEC_FLAG_GLOBAL_HEADER? Seems pretty hacky and confusing to me.
It's the norm for how AAC encoders in libavcodec work at the moment,
afaik. You have these two different bitstream formats, extradata + raw
access units (mp4 style) or no extradata and the corresponding data
prepended in the header (ADTS) - kinda like annexb vs mp4 for H264. That's
all about the first section above.

Then for the signaling mode, Alex explained it to me that we should prefer
using explicit signaling if possible, since that's, well, explicit with
what features the bitstream uses. This mode isn't possible when using ADTS
though, so then we have to use implicit signaling (as default).
Post by Derek Buitenhuis
Post by Martin Storsjö
Post by Derek Buitenhuis
Comment on where FFMAX(8192, 768 * avctx->channels) comes from, please.
I copied this from vo-aacenc, where Justin added it at some point. I think
the encoder lib has a comment about the bounds of the encoded data, I'll
try to look it up.
Thanks.
Post by Martin Storsjö
Post by Derek Buitenhuis
Is this really proper?
I don't see what wouldn't be proper here. We consumed the input data, but
the encoder didn't output anything yet, so we signal that everything is ok
but no data was returned.
I'm more wondering if this is a valid thing for libfdk-aac to do? I'm
clearly no AAC expert.
Hmm, it actually didn't happen at the start as I thought, but it does
happen at the end when flushing the encoder.

// Martin
Derek Buitenhuis
2012-07-11 19:34:23 UTC
Permalink
Post by Martin Storsjö
Post by Derek Buitenhuis
Seems I missed the inverse bit. My question is why do the both use
CODEC_FLAG_GLOBAL_HEADER? Seems pretty hacky and confusing to me.
It's the norm for how AAC encoders in libavcodec work at the moment,
afaik. You have these two different bitstream formats, extradata + raw
access units (mp4 style) or no extradata and the corresponding data
prepended in the header (ADTS) - kinda like annexb vs mp4 for H264. That's
all about the first section above.
Then for the signaling mode, Alex explained it to me that we should prefer
using explicit signaling if possible, since that's, well, explicit with
what features the bitstream uses. This mode isn't possible when using ADTS
though, so then we have to use implicit signaling (as default).
I saw you added a comment, so it gets an OK from me.
Post by Martin Storsjö
Post by Derek Buitenhuis
Post by Martin Storsjö
Post by Derek Buitenhuis
Is this really proper?
I don't see what wouldn't be proper here. We consumed the input data, but
the encoder didn't output anything yet, so we signal that everything is ok
but no data was returned.
I'm more wondering if this is a valid thing for libfdk-aac to do? I'm
clearly no AAC expert.
Hmm, it actually didn't happen at the start as I thought, but it does
happen at the end when flushing the encoder.
I'll leave this for you to decide on / look into then.

- Derek
Martin Storsjö
2012-07-11 20:47:06 UTC
Permalink
For enabling VBR, the general consensus seems to be to use the
qscale flag. There doesn't seem to be any consistent way to
indicate the actual desired quality though. Both libfaac and
libmp3lame calculate avctx->global_quality / FF_QP2LAMBDA and set
that as the libraries' VBR quality parameters, with wildly different
results. On libmp3lame, the VBR quality parameter is between 0 (best)
and 10 (worst), while the scale goes in the opposite direction for
libfaac, where higher quality values gives you better quality.

Therefore, for now, I just pass the actual value of
avctx->global_quality through. You can set it to values between 1
and 5:
1 - about 32 kbps/channel
2 - about 40 kbps/channel
3 - about 48-56 kbps/channel
4 - about 64 kbps/channel
5 - about 80-96 kbps/channel
---

Added a sensible default for the bitrate, depending on the
sample rate, SBR/PS configuration and channel types, as
suggested by Justin and Alex.

Changelog | 1 +
configure | 5 +
doc/general.texi | 12 +-
libavcodec/Makefile | 1 +
libavcodec/allcodecs.c | 1 +
libavcodec/libfdk-aacenc.c | 383 ++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 401 insertions(+), 2 deletions(-)
create mode 100644 libavcodec/libfdk-aacenc.c

diff --git a/Changelog b/Changelog
index 2fb5e3d..c56740c 100644
--- a/Changelog
+++ b/Changelog
@@ -33,6 +33,7 @@ version <next>:
- Microsoft ATC Screen decoder
- RTSP listen mode
- TechSmith Screen Codec 2 decoder
+- AAC encoding via libfdk-aac


version 0.8:
diff --git a/configure b/configure
index 4d83d4b..397be73 100755
--- a/configure
+++ b/configure
@@ -170,6 +170,7 @@ External library support:
--enable-libdc1394 enable IIDC-1394 grabbing using libdc1394
and libraw1394 [no]
--enable-libfaac enable FAAC support via libfaac [no]
+ --enable-libfdk-aac enable AAC support via libfdk-aac [no]
--enable-libfreetype enable libfreetype [no]
--enable-libgsm enable GSM support via libgsm [no]
--enable-libilbc enable iLBC de/encoding via libilbc [no]
@@ -943,6 +944,7 @@ CONFIG_LIST="
libcdio
libdc1394
libfaac
+ libfdk_aac
libfreetype
libgsm
libilbc
@@ -1448,6 +1450,7 @@ h264_parser_select="golomb h264dsp h264pred"

# external libraries
libfaac_encoder_deps="libfaac"
+libfdk_aac_encoder_deps="libfdk_aac"
libgsm_decoder_deps="libgsm"
libgsm_encoder_deps="libgsm"
libgsm_ms_decoder_deps="libgsm"
@@ -2968,6 +2971,7 @@ enabled avisynth && require2 vfw32 "windows.h vfw.h" AVIFileInit -lavifil32
enabled frei0r && { check_header frei0r.h || die "ERROR: frei0r.h header not found"; }
enabled gnutls && require_pkg_config gnutls gnutls/gnutls.h gnutls_global_init
enabled libfaac && require2 libfaac "stdint.h faac.h" faacEncGetVersion -lfaac
+enabled libfdk_aac && require libfdk_aac fdk-aac/aacenc_lib.h aacEncOpen -lfdk-aac
enabled libfreetype && require_pkg_config freetype2 "ft2build.h freetype/freetype.h" FT_Init_FreeType
enabled libgsm && require libgsm gsm/gsm.h gsm_create -lgsm
enabled libilbc && require libilbc ilbc.h WebRtcIlbcfix_InitDecode -lilbc
@@ -3259,6 +3263,7 @@ echo "gnutls enabled ${gnutls-no}"
echo "libcdio support ${libcdio-no}"
echo "libdc1394 support ${libdc1394-no}"
echo "libfaac enabled ${libfaac-no}"
+echo "libfdk-aac enabled ${libfdk_aac-no}"
echo "libgsm enabled ${libgsm-no}"
echo "libilbc enabled ${libilbc-no}"
echo "libmp3lame enabled ${libmp3lame-no}"
diff --git a/doc/general.texi b/doc/general.texi
index 7e9cfaf..fcac114 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -18,8 +18,8 @@ explicitly requested by passing the appropriate flags to

@section OpenCORE and VisualOn libraries

-Spun off Google Android sources, OpenCore and VisualOn libraries provide
-encoders for a number of audio codecs.
+Spun off Google Android sources, OpenCore, VisualOn and Fraunhofer
+libraries provide encoders for a number of audio codecs.

@float NOTE
OpenCORE and VisualOn libraries are under the Apache License 2.0
@@ -55,6 +55,14 @@ Go to @url{http://sourceforge.net/projects/opencore-amr/} and follow the
instructions for installing the library.
Then pass @code{--enable-libvo-amrwbenc} to configure to enable it.

+@subsection Fraunhofer AAC library
+
+Libav can make use of the Fraunhofer AAC library for AAC encoding.
+
+Go to @url{http://sourceforge.net/projects/opencore-amr/} and follow the
+instructions for installing the library.
+Then pass @code{--enable-libfdk-aac} to configure to enable it.
+
@section LAME

Libav can make use of the LAME library for MP3 encoding.
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index ac97d34..8d38ca2 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -595,6 +595,7 @@ OBJS-$(CONFIG_WTV_DEMUXER) += mpeg4audio.o mpegaudiodata.o

# external codec libraries
OBJS-$(CONFIG_LIBFAAC_ENCODER) += libfaac.o audio_frame_queue.o
+OBJS-$(CONFIG_LIBFDK_AAC_ENCODER) += libfdk-aacenc.o audio_frame_queue.o
OBJS-$(CONFIG_LIBGSM_DECODER) += libgsm.o
OBJS-$(CONFIG_LIBGSM_ENCODER) += libgsm.o
OBJS-$(CONFIG_LIBGSM_MS_DECODER) += libgsm.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 068f191..bd48728 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -380,6 +380,7 @@ void avcodec_register_all(void)

/* external libraries */
REGISTER_ENCODER (LIBFAAC, libfaac);
+ REGISTER_ENCODER (LIBFDK_AAC, libfdk_aac);
REGISTER_ENCDEC (LIBGSM, libgsm);
REGISTER_ENCDEC (LIBGSM_MS, libgsm_ms);
REGISTER_ENCDEC (LIBILBC, libilbc);
diff --git a/libavcodec/libfdk-aacenc.c b/libavcodec/libfdk-aacenc.c
new file mode 100644
index 0000000..eec1e04
--- /dev/null
+++ b/libavcodec/libfdk-aacenc.c
@@ -0,0 +1,383 @@
+/*
+ * AAC encoder wrapper
+ * Copyright (c) 2012 Martin Storsjo
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <fdk-aac/aacenc_lib.h>
+
+#include "avcodec.h"
+#include "audio_frame_queue.h"
+#include "internal.h"
+#include "libavutil/opt.h"
+
+typedef struct AACContext {
+ const AVClass *class;
+ HANDLE_AACENCODER handle;
+ int afterburner;
+ int eld_sbr;
+ int signaling;
+
+ AudioFrameQueue afq;
+} AACContext;
+
+static const AVOption aac_enc_options[] = {
+ { "afterburner", "Afterburner (improved quality)", offsetof(AACContext, afterburner), AV_OPT_TYPE_INT, { 1 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
+ { "eld_sbr", "Enable SBR for ELD (for SBR in other configurations, use the -profile parameter)", offsetof(AACContext, eld_sbr), AV_OPT_TYPE_INT, { 0 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
+ { "signaling", "SBR/PS signaling style", offsetof(AACContext, signaling), AV_OPT_TYPE_INT, { -1 }, -1, 2, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "default", "Choose signaling implicitly (explicit hierarchical by default, implicit if global header is disabled)", 0, AV_OPT_TYPE_CONST, { -1 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "implicit", "Implicit backwards compatible signaling", 0, AV_OPT_TYPE_CONST, { 0 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "explicit_sbr", "Explicit SBR, implicit PS signaling", 0, AV_OPT_TYPE_CONST, { 1 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { "explicit_hierarchical", "Explicit hierarchical signaling", 0, AV_OPT_TYPE_CONST, { 2 }, 0, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, "signaling" },
+ { NULL }
+};
+
+static const AVClass aac_enc_class = {
+ "libfdk_aac", av_default_item_name, aac_enc_options, LIBAVUTIL_VERSION_INT
+};
+
+static const char *aac_get_error(AACENC_ERROR err)
+{
+ switch (err) {
+ case AACENC_OK:
+ return "No error";
+ case AACENC_INVALID_HANDLE:
+ return "Invalid handle";
+ case AACENC_MEMORY_ERROR:
+ return "Memory allocation error";
+ case AACENC_UNSUPPORTED_PARAMETER:
+ return "Unsupported parameter";
+ case AACENC_INVALID_CONFIG:
+ return "Invalid config";
+ case AACENC_INIT_ERROR:
+ return "Initialization error";
+ case AACENC_INIT_AAC_ERROR:
+ return "AAC library initialization error";
+ case AACENC_INIT_SBR_ERROR:
+ return "SBR library initialization error";
+ case AACENC_INIT_TP_ERROR:
+ return "Transport library initialization error";
+ case AACENC_INIT_META_ERROR:
+ return "Metadata library initialization error";
+ case AACENC_ENCODE_ERROR:
+ return "Encoding error";
+ case AACENC_ENCODE_EOF:
+ return "End of file";
+ default:
+ return "Unknown error";
+ }
+}
+
+static int aac_encode_close(AVCodecContext *avctx)
+{
+ AACContext *s = avctx->priv_data;
+
+ if (s->handle)
+ aacEncClose(&s->handle);
+#if FF_API_OLD_ENCODE_AUDIO
+ av_freep(&avctx->coded_frame);
+#endif
+ av_freep(&avctx->extradata);
+ ff_af_queue_close(&s->afq);
+
+ return 0;
+}
+
+static av_cold int aac_encode_init(AVCodecContext *avctx)
+{
+ AACContext *s = avctx->priv_data;
+ int ret = AVERROR(EINVAL);
+ AACENC_InfoStruct info = { 0 };
+ CHANNEL_MODE mode;
+ AACENC_ERROR err;
+ int aot = FF_PROFILE_AAC_LOW + 1;
+ int sce = 0, cpe = 0;
+
+ if ((err = aacEncOpen(&s->handle, 0, avctx->channels)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to open the encoder: %s\n",
+ aac_get_error(err));
+ goto error;
+ }
+
+ if (avctx->profile != FF_PROFILE_UNKNOWN)
+ aot = avctx->profile + 1;
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_AOT, aot)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the AOT %d: %s\n",
+ aot, aac_get_error(err));
+ goto error;
+ }
+
+ if (aot == FF_PROFILE_AAC_ELD + 1 && s->eld_sbr) {
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_SBR_MODE,
+ 1)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to enable SBR for ELD: %s\n",
+ aac_get_error(err));
+ goto error;
+ }
+ }
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_SAMPLERATE,
+ avctx->sample_rate)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the sample rate %d: %s\n",
+ avctx->sample_rate, aac_get_error(err));
+ goto error;
+ }
+
+ switch (avctx->channels) {
+ case 1: mode = MODE_1; sce = 1; cpe = 0; break;
+ case 2: mode = MODE_2; sce = 0; cpe = 1; break;
+ case 3: mode = MODE_1_2; sce = 1; cpe = 1; break;
+ case 4: mode = MODE_1_2_1; sce = 2; cpe = 1; break;
+ case 5: mode = MODE_1_2_2; sce = 1; cpe = 2; break;
+ case 6: mode = MODE_1_2_2_1; sce = 2; cpe = 2; break;
+ default:
+ av_log(avctx, AV_LOG_ERROR,
+ "Unsupported number of channels %d\n", avctx->channels);
+ goto error;
+ }
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_CHANNELMODE,
+ mode)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR,
+ "Unable to set channel mode %d: %s\n", mode, aac_get_error(err));
+ goto error;
+ }
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_CHANNELORDER,
+ 1)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR,
+ "Unable to set wav channel order %d: %s\n",
+ mode, aac_get_error(err));
+ goto error;
+ }
+
+ if (avctx->flags & CODEC_FLAG_QSCALE) {
+ int mode = avctx->global_quality;
+ if (mode < 1 || mode > 5) {
+ av_log(avctx, AV_LOG_WARNING,
+ "VBR quality %d out of range, should be 1-5\n", mode);
+ mode = av_clip(mode, 1, 5);
+ }
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_BITRATEMODE,
+ mode)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the VBR bitrate mode %d: %s\n",
+ mode, aac_get_error(err));
+ goto error;
+ }
+ } else {
+ if (avctx->bit_rate <= 0) {
+ if (avctx->profile == FF_PROFILE_AAC_HE_V2) {
+ sce = 1;
+ cpe = 0;
+ }
+ avctx->bit_rate = (96*sce + 128*cpe) * avctx->sample_rate / 44;
+ if (avctx->profile == FF_PROFILE_AAC_HE ||
+ avctx->profile == FF_PROFILE_AAC_HE_V2 ||
+ s->eld_sbr)
+ avctx->bit_rate /= 2;
+ }
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_BITRATE,
+ avctx->bit_rate)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the bitrate %d: %s\n",
+ avctx->bit_rate, aac_get_error(err));
+ goto error;
+ }
+ }
+
+ /* Choose bitstream format - if global header is requested, use
+ * raw access units, otherwise use ADTS. */
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_TRANSMUX,
+ avctx->flags & CODEC_FLAG_GLOBAL_HEADER ? 0 : 2)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set the transmux format: %s\n",
+ aac_get_error(err));
+ goto error;
+ }
+
+ /* If no signaling mode is chosen, use explicit hierarchical signaling
+ * if using mp4 mode (raw access units, with global header) and
+ * implicit signaling if using ADTS. */
+ if (s->signaling < 0)
+ s->signaling = avctx->flags & CODEC_FLAG_GLOBAL_HEADER ? 2 : 0;
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_SIGNALING_MODE,
+ s->signaling)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set signaling mode %d: %s\n",
+ s->signaling, aac_get_error(err));
+ goto error;
+ }
+
+ if ((err = aacEncoder_SetParam(s->handle, AACENC_AFTERBURNER,
+ s->afterburner)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to set afterburner to %d: %s\n",
+ s->afterburner, aac_get_error(err));
+ goto error;
+ }
+
+ if ((err = aacEncEncode(s->handle, NULL, NULL, NULL, NULL)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to initialize the encoder: %s\n",
+ aac_get_error(err));
+ return AVERROR(EINVAL);
+ }
+
+ if ((err = aacEncInfo(s->handle, &info)) != AACENC_OK) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to get encoder info: %s\n",
+ aac_get_error(err));
+ goto error;
+ }
+
+#if FF_API_OLD_ENCODE_AUDIO
+ avctx->coded_frame = avcodec_alloc_frame();
+ if (!avctx->coded_frame) {
+ ret = AVERROR(ENOMEM);
+ goto error;
+ }
+#endif
+ avctx->frame_size = info.frameLength;
+ avctx->delay = info.encoderDelay;
+ ff_af_queue_init(avctx, &s->afq);
+
+ if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+ avctx->extradata_size = info.confSize;
+ avctx->extradata = av_mallocz(avctx->extradata_size +
+ FF_INPUT_BUFFER_PADDING_SIZE);
+ if (!avctx->extradata) {
+ ret = AVERROR(ENOMEM);
+ goto error;
+ }
+
+ memcpy(avctx->extradata, info.confBuf, info.confSize);
+ }
+ return 0;
+error:
+ aac_encode_close(avctx);
+ return ret;
+}
+
+static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+ const AVFrame *frame, int *got_packet_ptr)
+{
+ AACContext *s = avctx->priv_data;
+ AACENC_BufDesc in_buf = { 0 }, out_buf = { 0 };
+ AACENC_InArgs in_args = { 0 };
+ AACENC_OutArgs out_args = { 0 };
+ int in_buffer_identifier = IN_AUDIO_DATA;
+ int in_buffer_size, in_buffer_element_size;
+ int out_buffer_identifier = OUT_BITSTREAM_DATA;
+ int out_buffer_size, out_buffer_element_size;
+ void *in_ptr, *out_ptr;
+ int ret;
+ AACENC_ERROR err;
+
+ /* handle end-of-stream small frame and flushing */
+ if (!frame) {
+ in_args.numInSamples = -1;
+ } else {
+ in_ptr = frame->data[0];
+ in_buffer_size = 2 * avctx->channels * frame->nb_samples;
+ in_buffer_element_size = 2;
+
+ in_args.numInSamples = avctx->channels * frame->nb_samples;
+ in_buf.numBufs = 1;
+ in_buf.bufs = &in_ptr;
+ in_buf.bufferIdentifiers = &in_buffer_identifier;
+ in_buf.bufSizes = &in_buffer_size;
+ in_buf.bufElSizes = &in_buffer_element_size;
+
+ /* add current frame to the queue */
+ if ((ret = ff_af_queue_add(&s->afq, frame) < 0))
+ return ret;
+ }
+
+ /* The maximum packet size is 6144 bits aka 768 bytes per channel. */
+ if ((ret = ff_alloc_packet(avpkt, FFMAX(8192, 768 * avctx->channels)))) {
+ av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+ return ret;
+ }
+
+ out_ptr = avpkt->data;
+ out_buffer_size = avpkt->size;
+ out_buffer_element_size = 1;
+ out_buf.numBufs = 1;
+ out_buf.bufs = &out_ptr;
+ out_buf.bufferIdentifiers = &out_buffer_identifier;
+ out_buf.bufSizes = &out_buffer_size;
+ out_buf.bufElSizes = &out_buffer_element_size;
+
+ if ((err = aacEncEncode(s->handle, &in_buf, &out_buf, &in_args,
+ &out_args)) != AACENC_OK) {
+ if (!frame && err == AACENC_ENCODE_EOF)
+ return 0;
+ av_log(avctx, AV_LOG_ERROR, "Unable to encode frame: %s\n",
+ aac_get_error(err));
+ return AVERROR(EINVAL);
+ }
+
+ if (!out_args.numOutBytes)
+ return 0;
+
+ /* Get the next frame pts & duration */
+ ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
+ &avpkt->duration);
+
+ avpkt->size = out_args.numOutBytes;
+ *got_packet_ptr = 1;
+ return 0;
+}
+
+static const AVProfile profiles[] = {
+ { FF_PROFILE_AAC_LOW, "LC" },
+ { FF_PROFILE_AAC_HE, "HE-AAC" },
+ { FF_PROFILE_AAC_HE_V2, "HE-AACv2" },
+ { FF_PROFILE_AAC_LD, "LD" },
+ { FF_PROFILE_AAC_ELD, "ELD" },
+ { FF_PROFILE_UNKNOWN },
+};
+
+static const AVCodecDefault aac_encode_defaults[] = {
+ { "b", "0" },
+ { NULL }
+};
+
+static const uint64_t aac_channel_layout[] = {
+ AV_CH_LAYOUT_MONO,
+ AV_CH_LAYOUT_STEREO,
+ AV_CH_LAYOUT_SURROUND,
+ AV_CH_LAYOUT_4POINT0,
+ AV_CH_LAYOUT_5POINT0_BACK,
+ AV_CH_LAYOUT_5POINT1_BACK,
+ 0,
+};
+
+AVCodec ff_libfdk_aac_encoder = {
+ .name = "libfdk_aac",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .id = CODEC_ID_AAC,
+ .priv_data_size = sizeof(AACContext),
+ .init = aac_encode_init,
+ .encode2 = aac_encode_frame,
+ .close = aac_encode_close,
+ .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
+ .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
+ AV_SAMPLE_FMT_NONE },
+ .long_name = NULL_IF_CONFIG_SMALL("Fraunhofer FDK AAC"),
+ .priv_class = &aac_enc_class,
+ .defaults = aac_encode_defaults,
+ .profiles = profiles,
+ .channel_layouts = aac_channel_layout,
+};
--
1.7.9.4
Derek Buitenhuis
2012-07-11 15:44:29 UTC
Permalink
Post by Martin Storsjö
TODO: Are the mpeg2 ones official or specific to the fraunhofer
encoder?
[...]
Post by Martin Storsjö
#define FF_PROFILE_AAC_LOW 1
#define FF_PROFILE_AAC_SSR 2
#define FF_PROFILE_AAC_LTP 3
+#define FF_PROFILE_AAC_HE 4
+#define FF_PROFILE_AAC_HE_V2 28
+#define FF_PROFILE_AAC_LD 22
+#define FF_PROFILE_AAC_ELD 38
Where these numbers come from is something that needs to
be answered before push.

- Derek
Kieran Kunhya
2012-07-11 16:45:44 UTC
Permalink
On Wed, Jul 11, 2012 at 5:44 PM, Derek Buitenhuis
Post by Derek Buitenhuis
Post by Martin Storsjö
TODO: Are the mpeg2 ones official or specific to the fraunhofer
encoder?
[...]
Post by Martin Storsjö
#define FF_PROFILE_AAC_LOW 1
#define FF_PROFILE_AAC_SSR 2
#define FF_PROFILE_AAC_LTP 3
+#define FF_PROFILE_AAC_HE 4
+#define FF_PROFILE_AAC_HE_V2 28
+#define FF_PROFILE_AAC_LD 22
+#define FF_PROFILE_AAC_ELD 38
Where these numbers come from is something that needs to
be answered before push.
- Derek
_______________________________________________
libav-devel mailing list
https://lists.libav.org/mailman/listinfo/libav-devel
AOT tables in ISO/IEC 14496-3
Derek Buitenhuis
2012-07-11 17:11:38 UTC
Permalink
Post by Kieran Kunhya
AOT tables in ISO/IEC 14496-3
OK.

- Derek
Alex Converse
2012-07-11 17:16:27 UTC
Permalink
Post by Kieran Kunhya
On Wed, Jul 11, 2012 at 5:44 PM, Derek Buitenhuis
Post by Derek Buitenhuis
Post by Martin Storsjö
TODO: Are the mpeg2 ones official or specific to the fraunhofer
encoder?
The MPEG-2 ones are "fake." The word profile means very different
things in MPEG-2 and MPEG-4. These are all profiles in the MPEG-2
sense of the word. In MPEG-4 these are audio object types (because it
wouldn't be MPEG-4 if it wasn't all objects and descriptors). MPEG-4
profiles combine lists of AOTs that can be used together. More modern
(and relevant) MPEG-4 profiles (The "AAC Profile" family) map to a
single object type (and 0 or more enhancement object types). The
MPEG-2 "AOTs" used by this encoder are just MPEG-4 AOTs shifted by
127.

They can either by implemented with FF_PROFILEs or with a private
option shifting us into MPEG-2 mode.
Post by Kieran Kunhya
Post by Derek Buitenhuis
Post by Martin Storsjö
#define FF_PROFILE_AAC_LOW 1
#define FF_PROFILE_AAC_SSR 2
#define FF_PROFILE_AAC_LTP 3
+#define FF_PROFILE_AAC_HE 4
+#define FF_PROFILE_AAC_HE_V2 28
+#define FF_PROFILE_AAC_LD 22
+#define FF_PROFILE_AAC_ELD 38
Where these numbers come from is something that needs to
be answered before push.
AOT tables in ISO/IEC 14496-3
AOT minus 1, since MAIN is zero in MPEG-2 but in MPEG-4 NULL is zero
and MAIN is one.
Martin Storsjö
2012-07-11 18:19:09 UTC
Permalink
Post by Alex Converse
Post by Kieran Kunhya
On Wed, Jul 11, 2012 at 5:44 PM, Derek Buitenhuis
Post by Martin Storsjö
TODO: Are the mpeg2 ones official or specific to the fraunhofer
encoder?
The MPEG-2 ones are "fake." The word profile means very different
things in MPEG-2 and MPEG-4. These are all profiles in the MPEG-2
sense of the word. In MPEG-4 these are audio object types (because it
wouldn't be MPEG-4 if it wasn't all objects and descriptors). MPEG-4
profiles combine lists of AOTs that can be used together. More modern
(and relevant) MPEG-4 profiles (The "AAC Profile" family) map to a
single object type (and 0 or more enhancement object types). The
MPEG-2 "AOTs" used by this encoder are just MPEG-4 AOTs shifted by
127.
They can either by implemented with FF_PROFILEs or with a private
option shifting us into MPEG-2 mode.
Right. Yes, I actually left these out (but apparently forgot to update the
commit message), since I didn't get them to work properly when I tried.

// Martin
Continue reading on narkive:
Loading...