Discussion:
[libav-devel] [PATCH v4] qsvvpp: Fix to perform full init only when needed
Maxym Dmytrychenko
2018-07-17 16:07:20 UTC
Permalink
Not used VPP sessions, like for hwupload/hwdownload handling,
can increase CPU utilization and this patch fixes it.
thank you,Joe, for the contribution.

Signed-off-by: Maxym Dmytrychenko <***@gmail.com>
---
libavutil/hwcontext_qsv.c | 38 +++++++++++++++++++++++++++++---------
1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
index b3eb4a3ea..390c3aac4 100644
--- a/libavutil/hwcontext_qsv.c
+++ b/libavutil/hwcontext_qsv.c
@@ -18,6 +18,7 @@

#include <stdint.h>
#include <string.h>
+#include <stdatomic.h>

#include <mfx/mfxvideo.h>

@@ -56,7 +57,9 @@ typedef struct QSVDeviceContext {

typedef struct QSVFramesContext {
mfxSession session_download;
+ atomic_int session_download_init;
mfxSession session_upload;
+ atomic_int session_upload_init;

AVBufferRef *child_frames_ref;
mfxFrameSurface1 *surfaces_internal;
@@ -146,13 +149,15 @@ static void qsv_frames_uninit(AVHWFramesContext *ctx)
MFXVideoVPP_Close(s->session_download);
MFXClose(s->session_download);
}
- s->session_download = NULL;
+ s->session_download = NULL;
+ s->session_download_init = 0;

if (s->session_upload) {
MFXVideoVPP_Close(s->session_upload);
MFXClose(s->session_upload);
}
- s->session_upload = NULL;
+ s->session_upload = NULL;
+ s->session_upload_init = 0;

av_freep(&s->mem_ids);
av_freep(&s->surface_ptrs);
@@ -535,13 +540,10 @@ static int qsv_frames_init(AVHWFramesContext *ctx)
s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
}

- ret = qsv_init_internal_session(ctx, &s->session_download, 0);
- if (ret < 0)
- return ret;
-
- ret = qsv_init_internal_session(ctx, &s->session_upload, 1);
- if (ret < 0)
- return ret;
+ s->session_download = NULL;
+ s->session_upload = NULL;
+ s->session_download_init = 0;
+ s->session_upload_init = 0;

return 0;
}
@@ -741,6 +743,15 @@ static int qsv_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
mfxSyncPoint sync = NULL;
mfxStatus err;

+ while (!s->session_download_init && !s->session_download) {
+ if (atomic_fetch_add(&s->session_download_init, 1) == 0) {
+ qsv_init_internal_session(ctx, &s->session_download, 0);
+ }
+ else {
+ av_usleep(1);
+ }
+ }
+
if (!s->session_download) {
if (s->child_frames_ref)
return qsv_transfer_data_child(ctx, dst, src);
@@ -788,6 +799,15 @@ static int qsv_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
mfxSyncPoint sync = NULL;
mfxStatus err;

+ while (!s->session_upload_init && !s->session_upload) {
+ if (atomic_fetch_add(&s->session_upload_init, 1) == 0) {
+ qsv_init_internal_session(ctx, &s->session_upload, 1);
+ }
+ else {
+ av_usleep(1);
+ }
+ }
+
if (!s->session_upload) {
if (s->child_frames_ref)
return qsv_transfer_data_child(ctx, dst, src);
--
2.15.2 (Apple Git-101.1)
Mark Thompson
2018-07-17 22:48:00 UTC
Permalink
Post by Maxym Dmytrychenko
Not used VPP sessions, like for hwupload/hwdownload handling,
can increase CPU utilization and this patch fixes it.
thank you,Joe, for the contribution.
---
libavutil/hwcontext_qsv.c | 38 +++++++++++++++++++++++++++++---------
1 file changed, 29 insertions(+), 9 deletions(-)
diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
index b3eb4a3ea..390c3aac4 100644
--- a/libavutil/hwcontext_qsv.c
+++ b/libavutil/hwcontext_qsv.c
@@ -18,6 +18,7 @@
#include <stdint.h>
#include <string.h>
+#include <stdatomic.h>
#include <mfx/mfxvideo.h>
@@ -56,7 +57,9 @@ typedef struct QSVDeviceContext {
typedef struct QSVFramesContext {
mfxSession session_download;
+ atomic_int session_download_init;
mfxSession session_upload;
+ atomic_int session_upload_init;
AVBufferRef *child_frames_ref;
mfxFrameSurface1 *surfaces_internal;
@@ -146,13 +149,15 @@ static void qsv_frames_uninit(AVHWFramesContext *ctx)
MFXVideoVPP_Close(s->session_download);
MFXClose(s->session_download);
}
- s->session_download = NULL;
+ s->session_download = NULL;
+ s->session_download_init = 0;
if (s->session_upload) {
MFXVideoVPP_Close(s->session_upload);
MFXClose(s->session_upload);
}
- s->session_upload = NULL;
+ s->session_upload = NULL;
+ s->session_upload_init = 0;
av_freep(&s->mem_ids);
av_freep(&s->surface_ptrs);
@@ -535,13 +540,10 @@ static int qsv_frames_init(AVHWFramesContext *ctx)
s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
}
- ret = qsv_init_internal_session(ctx, &s->session_download, 0);
- if (ret < 0)
- return ret;
-
- ret = qsv_init_internal_session(ctx, &s->session_upload, 1);
- if (ret < 0)
- return ret;
+ s->session_download = NULL;
+ s->session_upload = NULL;
+ s->session_download_init = 0;
+ s->session_upload_init = 0;
return 0;
}
@@ -741,6 +743,15 @@ static int qsv_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
mfxSyncPoint sync = NULL;
mfxStatus err;
+ while (!s->session_download_init && !s->session_download) {
+ if (atomic_fetch_add(&s->session_download_init, 1) == 0) {
+ qsv_init_internal_session(ctx, &s->session_download, 0);
+ }
+ else {
+ av_usleep(1);
This races - consider what happens if the other thread is preempted for more than 1µs, or if the initialisation itself takes more than that long.

You need to actually do some synchronisation here (e.g. with a 'once' variable) - with only atomic flags there is no way to guarantee that the other thread has finished unless you spin, which isn't acceptable.
Post by Maxym Dmytrychenko
+ }
+ }
+
if (!s->session_download) {
if (s->child_frames_ref)
return qsv_transfer_data_child(ctx, dst, src);
@@ -788,6 +799,15 @@ static int qsv_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
mfxSyncPoint sync = NULL;
mfxStatus err;
+ while (!s->session_upload_init && !s->session_upload) {
+ if (atomic_fetch_add(&s->session_upload_init, 1) == 0) {
+ qsv_init_internal_session(ctx, &s->session_upload, 1);
+ }
+ else {
+ av_usleep(1);
+ }
+ }
+
if (!s->session_upload) {
if (s->child_frames_ref)
return qsv_transfer_data_child(ctx, dst, src);
Thanks,

- Mark
Maxym Dmytrychenko
2018-07-17 23:25:59 UTC
Permalink
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
Not used VPP sessions, like for hwupload/hwdownload handling,
can increase CPU utilization and this patch fixes it.
thank you,Joe, for the contribution.
---
libavutil/hwcontext_qsv.c | 38 +++++++++++++++++++++++++++++---------
1 file changed, 29 insertions(+), 9 deletions(-)
diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
index b3eb4a3ea..390c3aac4 100644
--- a/libavutil/hwcontext_qsv.c
+++ b/libavutil/hwcontext_qsv.c
@@ -18,6 +18,7 @@
#include <stdint.h>
#include <string.h>
+#include <stdatomic.h>
#include <mfx/mfxvideo.h>
@@ -56,7 +57,9 @@ typedef struct QSVDeviceContext {
typedef struct QSVFramesContext {
mfxSession session_download;
+ atomic_int session_download_init;
mfxSession session_upload;
+ atomic_int session_upload_init;
AVBufferRef *child_frames_ref;
mfxFrameSurface1 *surfaces_internal;
@@ -146,13 +149,15 @@ static void qsv_frames_uninit(AVHWFramesContext
*ctx)
Post by Maxym Dmytrychenko
MFXVideoVPP_Close(s->session_download);
MFXClose(s->session_download);
}
- s->session_download = NULL;
+ s->session_download = NULL;
+ s->session_download_init = 0;
if (s->session_upload) {
MFXVideoVPP_Close(s->session_upload);
MFXClose(s->session_upload);
}
- s->session_upload = NULL;
+ s->session_upload = NULL;
+ s->session_upload_init = 0;
av_freep(&s->mem_ids);
av_freep(&s->surface_ptrs);
@@ -535,13 +540,10 @@ static int qsv_frames_init(AVHWFramesContext *ctx)
s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
}
- ret = qsv_init_internal_session(ctx, &s->session_download, 0);
- if (ret < 0)
- return ret;
-
- ret = qsv_init_internal_session(ctx, &s->session_upload, 1);
- if (ret < 0)
- return ret;
+ s->session_download = NULL;
+ s->session_upload = NULL;
+ s->session_download_init = 0;
+ s->session_upload_init = 0;
return 0;
}
@@ -741,6 +743,15 @@ static int qsv_transfer_data_from(AVHWFramesContext
*ctx, AVFrame *dst,
Post by Maxym Dmytrychenko
mfxSyncPoint sync = NULL;
mfxStatus err;
+ while (!s->session_download_init && !s->session_download) {
+ if (atomic_fetch_add(&s->session_download_init, 1) == 0) {
+ qsv_init_internal_session(ctx, &s->session_download, 0);
+ }
+ else {
+ av_usleep(1);
This races - consider what happens if the other thread is preempted for
more than 1µs, or if the initialisation itself takes more than that long.
You need to actually do some synchronisation here (e.g. with a 'once'
variable) - with only atomic flags there is no way to guarantee that the
other thread has finished unless you spin, which isn't acceptable.
pthread_once was considered but we need to pass s->session_download
adding mutex - can be overkill for each call of qsv_transfer_data_*

what do you mean by 'once' variable?
dont really see it currently implemented somewhere...
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
+ }
+ }
+
if (!s->session_download) {
if (s->child_frames_ref)
return qsv_transfer_data_child(ctx, dst, src);
@@ -788,6 +799,15 @@ static int qsv_transfer_data_to(AVHWFramesContext
*ctx, AVFrame *dst,
Post by Maxym Dmytrychenko
mfxSyncPoint sync = NULL;
mfxStatus err;
+ while (!s->session_upload_init && !s->session_upload) {
+ if (atomic_fetch_add(&s->session_upload_init, 1) == 0) {
+ qsv_init_internal_session(ctx, &s->session_upload, 1);
+ }
+ else {
+ av_usleep(1);
+ }
+ }
+
if (!s->session_upload) {
if (s->child_frames_ref)
return qsv_transfer_data_child(ctx, dst, src);
Thanks,
- Mark
_______________________________________________
libav-devel mailing list
https://lists.libav.org/mailman/listinfo/libav-devel
regards
Max
Mark Thompson
2018-07-17 23:34:48 UTC
Permalink
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
Not used VPP sessions, like for hwupload/hwdownload handling,
can increase CPU utilization and this patch fixes it.
thank you,Joe, for the contribution.
---
libavutil/hwcontext_qsv.c | 38 +++++++++++++++++++++++++++++---------
1 file changed, 29 insertions(+), 9 deletions(-)
diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
index b3eb4a3ea..390c3aac4 100644
--- a/libavutil/hwcontext_qsv.c
+++ b/libavutil/hwcontext_qsv.c
@@ -18,6 +18,7 @@
#include <stdint.h>
#include <string.h>
+#include <stdatomic.h>
#include <mfx/mfxvideo.h>
@@ -56,7 +57,9 @@ typedef struct QSVDeviceContext {
typedef struct QSVFramesContext {
mfxSession session_download;
+ atomic_int session_download_init;
mfxSession session_upload;
+ atomic_int session_upload_init;
AVBufferRef *child_frames_ref;
mfxFrameSurface1 *surfaces_internal;
@@ -146,13 +149,15 @@ static void qsv_frames_uninit(AVHWFramesContext
*ctx)
Post by Maxym Dmytrychenko
MFXVideoVPP_Close(s->session_download);
MFXClose(s->session_download);
}
- s->session_download = NULL;
+ s->session_download = NULL;
+ s->session_download_init = 0;
if (s->session_upload) {
MFXVideoVPP_Close(s->session_upload);
MFXClose(s->session_upload);
}
- s->session_upload = NULL;
+ s->session_upload = NULL;
+ s->session_upload_init = 0;
av_freep(&s->mem_ids);
av_freep(&s->surface_ptrs);
@@ -535,13 +540,10 @@ static int qsv_frames_init(AVHWFramesContext *ctx)
s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
}
- ret = qsv_init_internal_session(ctx, &s->session_download, 0);
- if (ret < 0)
- return ret;
-
- ret = qsv_init_internal_session(ctx, &s->session_upload, 1);
- if (ret < 0)
- return ret;
+ s->session_download = NULL;
+ s->session_upload = NULL;
+ s->session_download_init = 0;
+ s->session_upload_init = 0;
return 0;
}
@@ -741,6 +743,15 @@ static int qsv_transfer_data_from(AVHWFramesContext
*ctx, AVFrame *dst,
Post by Maxym Dmytrychenko
mfxSyncPoint sync = NULL;
mfxStatus err;
+ while (!s->session_download_init && !s->session_download) {
+ if (atomic_fetch_add(&s->session_download_init, 1) == 0) {
+ qsv_init_internal_session(ctx, &s->session_download, 0);
+ }
+ else {
+ av_usleep(1);
This races - consider what happens if the other thread is preempted for
more than 1µs, or if the initialisation itself takes more than that long.
(Apologies, I misread that the first time - with the spin loop it should only be a benign race on session_download, but it's still undefined behaviour by C11 and things like tsan will complain about it: read in the non-initialising thread against write in the initialising thread, after the flag has been set to 1.)
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
You need to actually do some synchronisation here (e.g. with a 'once'
variable) - with only atomic flags there is no way to guarantee that the
other thread has finished unless you spin, which isn't acceptable.
pthread_once was considered but we need to pass s->session_download
adding mutex - can be overkill for each call of qsv_transfer_data_*
what do you mean by 'once' variable?
dont really see it currently implemented somewhere...
See AVOnce. It's used in a few places in this role, for example in hwcontext_d3d11va.c for the library loading.
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
+ }
+ }
+
if (!s->session_download) {
if (s->child_frames_ref)
return qsv_transfer_data_child(ctx, dst, src);
@@ -788,6 +799,15 @@ static int qsv_transfer_data_to(AVHWFramesContext
*ctx, AVFrame *dst,
Post by Maxym Dmytrychenko
mfxSyncPoint sync = NULL;
mfxStatus err;
+ while (!s->session_upload_init && !s->session_upload) {
+ if (atomic_fetch_add(&s->session_upload_init, 1) == 0) {
+ qsv_init_internal_session(ctx, &s->session_upload, 1);
+ }
+ else {
+ av_usleep(1);
+ }
+ }
+
if (!s->session_upload) {
if (s->child_frames_ref)
return qsv_transfer_data_child(ctx, dst, src);
Maxym Dmytrychenko
2018-07-17 23:53:13 UTC
Permalink
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
Not used VPP sessions, like for hwupload/hwdownload handling,
can increase CPU utilization and this patch fixes it.
thank you,Joe, for the contribution.
---
libavutil/hwcontext_qsv.c | 38 +++++++++++++++++++++++++++++---------
1 file changed, 29 insertions(+), 9 deletions(-)
diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
index b3eb4a3ea..390c3aac4 100644
--- a/libavutil/hwcontext_qsv.c
+++ b/libavutil/hwcontext_qsv.c
@@ -18,6 +18,7 @@
#include <stdint.h>
#include <string.h>
+#include <stdatomic.h>
#include <mfx/mfxvideo.h>
@@ -56,7 +57,9 @@ typedef struct QSVDeviceContext {
typedef struct QSVFramesContext {
mfxSession session_download;
+ atomic_int session_download_init;
mfxSession session_upload;
+ atomic_int session_upload_init;
AVBufferRef *child_frames_ref;
mfxFrameSurface1 *surfaces_internal;
@@ -146,13 +149,15 @@ static void qsv_frames_uninit(AVHWFramesContext
*ctx)
Post by Maxym Dmytrychenko
MFXVideoVPP_Close(s->session_download);
MFXClose(s->session_download);
}
- s->session_download = NULL;
+ s->session_download = NULL;
+ s->session_download_init = 0;
if (s->session_upload) {
MFXVideoVPP_Close(s->session_upload);
MFXClose(s->session_upload);
}
- s->session_upload = NULL;
+ s->session_upload = NULL;
+ s->session_upload_init = 0;
av_freep(&s->mem_ids);
av_freep(&s->surface_ptrs);
@@ -535,13 +540,10 @@ static int qsv_frames_init(AVHWFramesContext
*ctx)
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
}
- ret = qsv_init_internal_session(ctx, &s->session_download, 0);
- if (ret < 0)
- return ret;
-
- ret = qsv_init_internal_session(ctx, &s->session_upload, 1);
- if (ret < 0)
- return ret;
+ s->session_download = NULL;
+ s->session_upload = NULL;
+ s->session_download_init = 0;
+ s->session_upload_init = 0;
return 0;
}
@@ -741,6 +743,15 @@ static int
qsv_transfer_data_from(AVHWFramesContext
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
*ctx, AVFrame *dst,
Post by Maxym Dmytrychenko
mfxSyncPoint sync = NULL;
mfxStatus err;
+ while (!s->session_download_init && !s->session_download) {
+ if (atomic_fetch_add(&s->session_download_init, 1) == 0) {
+ qsv_init_internal_session(ctx, &s->session_download, 0);
+ }
+ else {
+ av_usleep(1);
This races - consider what happens if the other thread is preempted for
more than 1µs, or if the initialisation itself takes more than that
long.
(Apologies, I misread that the first time - with the spin loop it should
only be a benign race on session_download, but it's still undefined
behaviour by C11 and things like tsan will complain about it: read in the
non-initialising thread against write in the initialising thread, after the
flag has been set to 1.)
np,
let's fix it, sure.
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
You need to actually do some synchronisation here (e.g. with a 'once'
variable) - with only atomic flags there is no way to guarantee that the
other thread has finished unless you spin, which isn't acceptable.
pthread_once was considered but we need to pass s->session_download
adding mutex - can be overkill for each call of qsv_transfer_data_*
what do you mean by 'once' variable?
dont really see it currently implemented somewhere...
See AVOnce. It's used in a few places in this role, for example in
hwcontext_d3d11va.c for the library loading.
if I see it correct: (ret = ff_thread_once(&functions_loaded,
load_functions)
it comes down to :
static inline int ff_thread_once(char *control, void (*routine)(void))
note - no params for routine(), where I need to pass current ( ctx,
&s->session_download )
that is the puzzle to solve now.

Also thinking if
#if HAVE_PTHREADS
would be needed.

pthread_mutex / pthread_cond seems to be the only option left,
anything else?
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
Post by Maxym Dmytrychenko
+ }
+ }
+
if (!s->session_download) {
if (s->child_frames_ref)
return qsv_transfer_data_child(ctx, dst, src);
@@ -788,6 +799,15 @@ static int qsv_transfer_data_to(AVHWFramesContext
*ctx, AVFrame *dst,
Post by Maxym Dmytrychenko
mfxSyncPoint sync = NULL;
mfxStatus err;
+ while (!s->session_upload_init && !s->session_upload) {
+ if (atomic_fetch_add(&s->session_upload_init, 1) == 0) {
+ qsv_init_internal_session(ctx, &s->session_upload, 1);
+ }
+ else {
+ av_usleep(1);
+ }
+ }
+
if (!s->session_upload) {
if (s->child_frames_ref)
return qsv_transfer_data_child(ctx, dst, src);
_______________________________________________
libav-devel mailing list
https://lists.libav.org/mailman/listinfo/libav-devel
Loading...