forked from HandBrake/HandBrake
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
contrib: Add x264 patch to fix AVX-512 alignment.
- Loading branch information
1 parent
f0cad86
commit a8a3248
Showing
1 changed file
with
74 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
From 9384a7389b251b59a079ccc3d1af9edd42e3d5e6 Mon Sep 17 00:00:00 2001 | ||
From: Henrik Gramner <[email protected]> | ||
Date: Mon, 15 Jan 2018 21:42:59 +0100 | ||
Subject: [PATCH] Correctly align buffers for AVX and AVX-512 | ||
|
||
Fixes segfaults on Windows where the stack is only 16-byte aligned. | ||
--- | ||
common/common.h | 8 ++++---- | ||
encoder/analyse.c | 4 ++-- | ||
encoder/ratecontrol.c | 2 +- | ||
3 files changed, 7 insertions(+), 7 deletions(-) | ||
|
||
diff --git a/common/common.h b/common/common.h | ||
index 27a56fbd..84d39ce3 100644 | ||
--- a/common/common.h | ||
+++ b/common/common.h | ||
@@ -569,16 +569,16 @@ struct x264_t | ||
ALIGNED_64( pixel fdec_buf[54*FDEC_STRIDE] ); | ||
|
||
/* i4x4 and i8x8 backup data, for skipping the encode stage when possible */ | ||
- ALIGNED_16( pixel i4x4_fdec_buf[16*16] ); | ||
- ALIGNED_16( pixel i8x8_fdec_buf[16*16] ); | ||
+ ALIGNED_32( pixel i4x4_fdec_buf[16*16] ); | ||
+ ALIGNED_32( pixel i8x8_fdec_buf[16*16] ); | ||
ALIGNED_64( dctcoef i8x8_dct_buf[3][64] ); | ||
ALIGNED_64( dctcoef i4x4_dct_buf[15][16] ); | ||
uint32_t i4x4_nnz_buf[4]; | ||
uint32_t i8x8_nnz_buf[4]; | ||
|
||
/* Psy trellis DCT data */ | ||
- ALIGNED_16( dctcoef fenc_dct8[4][64] ); | ||
- ALIGNED_16( dctcoef fenc_dct4[16][16] ); | ||
+ ALIGNED_64( dctcoef fenc_dct8[4][64] ); | ||
+ ALIGNED_64( dctcoef fenc_dct4[16][16] ); | ||
|
||
/* Psy RD SATD/SA8D scores cache */ | ||
ALIGNED_64( uint32_t fenc_satd_cache[32] ); | ||
diff --git a/encoder/analyse.c b/encoder/analyse.c | ||
index ecd6dae1..3577e5bc 100644 | ||
--- a/encoder/analyse.c | ||
+++ b/encoder/analyse.c | ||
@@ -558,7 +558,7 @@ static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra, | ||
/* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */ | ||
static void inline psy_trellis_init( x264_t *h, int do_both_dct ) | ||
{ | ||
- ALIGNED_16( static pixel zero[16*FDEC_STRIDE] ) = {0}; | ||
+ ALIGNED_64( static pixel zero[16*FDEC_STRIDE] ) = {0}; | ||
|
||
if( do_both_dct || h->mb.b_transform_8x8 ) | ||
h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero ); | ||
@@ -2011,7 +2011,7 @@ static void mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a ) | ||
} | ||
else | ||
{ | ||
- ALIGNED_ARRAY_32( pixel, pixuv, [2],[16*FENC_STRIDE] ); | ||
+ ALIGNED_ARRAY_64( pixel, pixuv, [2],[16*FENC_STRIDE] ); | ||
int chromapix = h->luma2chroma_pixel[PIXEL_16x16]; | ||
int v_shift = CHROMA_V_SHIFT; | ||
|
||
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c | ||
index b09c2434..99803718 100644 | ||
--- a/encoder/ratecontrol.c | ||
+++ b/encoder/ratecontrol.c | ||
@@ -243,7 +243,7 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2 | ||
stride <<= b_field; | ||
if( b_chroma ) | ||
{ | ||
- ALIGNED_ARRAY_32( pixel, pix,[FENC_STRIDE*16] ); | ||
+ ALIGNED_ARRAY_64( pixel, pix,[FENC_STRIDE*16] ); | ||
int chromapix = h->luma2chroma_pixel[PIXEL_16x16]; | ||
int shift = 7 - CHROMA_V_SHIFT; | ||
|
||
-- | ||
2.11.0 |