Skip to content

Commit

Permalink
Merge remote-tracking branch 'qatar/master'
Browse files Browse the repository at this point in the history
* qatar/master:
  SBR DSP: fix SSE code to not use SSE2 instructions.
  cpu: initialize mask to -1, so that by default, optimizations are used.
  error_resilience: initialize s->block_index[].
  svq3: protect against negative quantizers.
  Don't use ff_cropTbl[] for IDCT.
  swscale: make filterPos 32bit.
  FATE: add CPUFLAGS variable, mapping to -cpuflags avconv option.
  avconv: add -cpuflags option for setting supported cpuflags.
  cpu: add av_set_cpu_flags_mask().
  libx264: Allow overriding the sliced threads option
  avconv: fix counting encoded video size.

Conflicts:
	doc/APIchanges
	doc/fate.texi
	doc/ffmpeg.texi
	ffmpeg.c
	libavcodec/h264idct_template.c
	libavcodec/svq3.c
	libavutil/avutil.h
	libavutil/cpu.c
	libavutil/cpu.h
	libswscale/swscale.c
	tests/Makefile
	tests/fate-run.sh
	tests/regression-funcs.sh

Merged-by: Michael Niedermayer <[email protected]>
  • Loading branch information
michaelni committed Mar 7, 2012
2 parents 57986c5 + b516190 commit 6df42f9
Show file tree
Hide file tree
Showing 30 changed files with 297 additions and 230 deletions.
60 changes: 49 additions & 11 deletions cmdutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -340,11 +340,8 @@ void parse_options(void *optctx, int argc, char **argv, const OptionDef *options
}
}

/*
* Return index of option opt in argv or 0 if not found.
*/
static int locate_option(int argc, char **argv, const OptionDef *options,
const char *optname)
int locate_option(int argc, char **argv, const OptionDef *options,
const char *optname)
{
const OptionDef *po;
int i;
Expand Down Expand Up @@ -537,13 +534,54 @@ int opt_max_alloc(const char *opt, const char *arg)

int opt_cpuflags(const char *opt, const char *arg)
{
char *tail;
long flags = strtol(arg, &tail, 10);
#define CPUFLAG_MMX2 (AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMX2)
#define CPUFLAG_3DNOW (AV_CPU_FLAG_3DNOW | AV_CPU_FLAG_MMX)
#define CPUFLAG_3DNOWEXT (AV_CPU_FLAG_3DNOWEXT | CPUFLAG_3DNOW)
#define CPUFLAG_SSE (AV_CPU_FLAG_SSE | CPUFLAG_MMX2)
#define CPUFLAG_SSE2 (AV_CPU_FLAG_SSE2 | CPUFLAG_SSE)
#define CPUFLAG_SSE2SLOW (AV_CPU_FLAG_SSE2SLOW | CPUFLAG_SSE2)
#define CPUFLAG_SSE3 (AV_CPU_FLAG_SSE3 | CPUFLAG_SSE2)
#define CPUFLAG_SSE3SLOW (AV_CPU_FLAG_SSE3SLOW | CPUFLAG_SSE3)
#define CPUFLAG_SSSE3 (AV_CPU_FLAG_SSSE3 | CPUFLAG_SSE3)
#define CPUFLAG_SSE4 (AV_CPU_FLAG_SSE4 | CPUFLAG_SSSE3)
#define CPUFLAG_SSE42 (AV_CPU_FLAG_SSE42 | CPUFLAG_SSE4)
#define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42)
#define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX)
#define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX)
static const AVOption cpuflags_opts[] = {
{ "flags" , NULL, 0, AV_OPT_TYPE_FLAGS, { 0 }, INT64_MIN, INT64_MAX, .unit = "flags" },
{ "altivec" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ALTIVEC }, .unit = "flags" },
{ "mmx" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_MMX }, .unit = "flags" },
{ "mmx2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_MMX2 }, .unit = "flags" },
{ "sse" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE }, .unit = "flags" },
{ "sse2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2 }, .unit = "flags" },
{ "sse2slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2SLOW }, .unit = "flags" },
{ "sse3" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE3 }, .unit = "flags" },
{ "sse3slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE3SLOW }, .unit = "flags" },
{ "ssse3" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSSE3 }, .unit = "flags" },
{ "atom" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ATOM }, .unit = "flags" },
{ "sse4.1" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE4 }, .unit = "flags" },
{ "sse4.2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE42 }, .unit = "flags" },
{ "avx" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_AVX }, .unit = "flags" },
{ "xop" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_XOP }, .unit = "flags" },
{ "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_FMA4 }, .unit = "flags" },
{ "3dnow" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_3DNOW }, .unit = "flags" },
{ "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_3DNOWEXT }, .unit = "flags" },
{ NULL },
};
static const AVClass class = {
.class_name = "cpuflags",
.item_name = av_default_item_name,
.option = cpuflags_opts,
.version = LIBAVUTIL_VERSION_INT,
};
int flags = av_get_cpu_flags();
int ret;
const AVClass *pclass = &class;

if ((ret = av_opt_eval_flags(&pclass, &cpuflags_opts[0], arg, &flags)) < 0)
return ret;

if (*tail) {
av_log(NULL, AV_LOG_FATAL, "Invalid cpuflags \"%s\".\n", arg);
exit_program(1);
}
av_force_cpu_flags(flags);
return 0;
}
Expand Down
6 changes: 6 additions & 0 deletions cmdutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,12 @@ int parse_option(void *optctx, const char *opt, const char *arg,
*/
void parse_loglevel(int argc, char **argv, const OptionDef *options);

/**
* Return index of option opt in argv or 0 if not found.
*/
int locate_option(int argc, char **argv, const OptionDef *options,
const char *optname);

/**
* Check if the given stream matches a stream specifier.
*
Expand Down
2 changes: 1 addition & 1 deletion cmdutils_common_opts.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
{ "debug", HAS_ARG, {(void*)opt_codec_debug}, "set debug flags", "flags" },
{ "report", 0, {(void*)opt_report}, "generate a report" },
{ "max_alloc", HAS_ARG, {(void*)opt_max_alloc}, "set maximum size of a single allocated block", "bytes" },
{ "cpuflags", HAS_ARG, {(void*)opt_cpuflags}, "force specific cpu flags", "flags" },
{ "cpuflags", HAS_ARG | OPT_EXPERT, {(void*)opt_cpuflags}, "force specific cpu flags", "flags" },
4 changes: 4 additions & 0 deletions doc/avtools-common-opts.texi
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ It also implies @code{-loglevel verbose}.
Note: setting the environment variable @code{FFREPORT} to any value has the
same effect.

@item -cpuflags flags (@emph{global})
Allows setting and clearing cpu flags. This option is intended
for testing. Do not use it unless you know what you're doing.

@end table

@section AVOptions
Expand Down
6 changes: 4 additions & 2 deletions doc/fate.texi
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,11 @@ the synchronisation of the samples directory.
@item THREADS
Specify how many threads to use while running regression tests, it is
quite useful to detect thread-related regressions.
@item CPUFLAGS
Specify CPU flags.
@end table

Example:
@example
make V=1 SAMPLES=/var/fate/samples THREADS=2 fate
@end example
make V=1 SAMPLES=/var/fate/samples THREADS=2 CPUFLAGS=mmx fate
@end example
9 changes: 9 additions & 0 deletions ffmpeg.c
Original file line number Diff line number Diff line change
Expand Up @@ -4963,6 +4963,13 @@ static int opt_deinterlace(const char *opt, const char *arg)
return 0;
}

static void parse_cpuflags(int argc, char **argv, const OptionDef *options)
{
int idx = locate_option(argc, argv, options, "cpuflags");
if (idx && argv[idx + 1])
opt_cpuflags("cpuflags", argv[idx + 1]);
}

#define OFFSET(x) offsetof(OptionsContext, x)
static const OptionDef options[] = {
/* main options */
Expand Down Expand Up @@ -5136,6 +5143,8 @@ int main(int argc, char **argv)

term_init();

parse_cpuflags(argc, argv, options);

/* parse options */
parse_options(&o, argc, argv, options, opt_output_file);

Expand Down
70 changes: 30 additions & 40 deletions libavcodec/dsputil.c
Original file line number Diff line number Diff line change
Expand Up @@ -367,18 +367,17 @@ void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
int line_size)
{
int i;
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;

/* read the pixels */
for(i=0;i<8;i++) {
pixels[0] = cm[block[0]];
pixels[1] = cm[block[1]];
pixels[2] = cm[block[2]];
pixels[3] = cm[block[3]];
pixels[4] = cm[block[4]];
pixels[5] = cm[block[5]];
pixels[6] = cm[block[6]];
pixels[7] = cm[block[7]];
pixels[0] = av_clip_uint8(block[0]);
pixels[1] = av_clip_uint8(block[1]);
pixels[2] = av_clip_uint8(block[2]);
pixels[3] = av_clip_uint8(block[3]);
pixels[4] = av_clip_uint8(block[4]);
pixels[5] = av_clip_uint8(block[5]);
pixels[6] = av_clip_uint8(block[6]);
pixels[7] = av_clip_uint8(block[7]);

pixels += line_size;
block += 8;
Expand All @@ -389,14 +388,13 @@ static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels
int line_size)
{
int i;
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;

/* read the pixels */
for(i=0;i<4;i++) {
pixels[0] = cm[block[0]];
pixels[1] = cm[block[1]];
pixels[2] = cm[block[2]];
pixels[3] = cm[block[3]];
pixels[0] = av_clip_uint8(block[0]);
pixels[1] = av_clip_uint8(block[1]);
pixels[2] = av_clip_uint8(block[2]);
pixels[3] = av_clip_uint8(block[3]);

pixels += line_size;
block += 8;
Expand All @@ -407,12 +405,11 @@ static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels
int line_size)
{
int i;
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;

/* read the pixels */
for(i=0;i<2;i++) {
pixels[0] = cm[block[0]];
pixels[1] = cm[block[1]];
pixels[0] = av_clip_uint8(block[0]);
pixels[1] = av_clip_uint8(block[1]);

pixels += line_size;
block += 8;
Expand Down Expand Up @@ -444,18 +441,17 @@ void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
int line_size)
{
int i;
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;

/* read the pixels */
for(i=0;i<8;i++) {
pixels[0] = cm[pixels[0] + block[0]];
pixels[1] = cm[pixels[1] + block[1]];
pixels[2] = cm[pixels[2] + block[2]];
pixels[3] = cm[pixels[3] + block[3]];
pixels[4] = cm[pixels[4] + block[4]];
pixels[5] = cm[pixels[5] + block[5]];
pixels[6] = cm[pixels[6] + block[6]];
pixels[7] = cm[pixels[7] + block[7]];
pixels[0] = av_clip_uint8(pixels[0] + block[0]);
pixels[1] = av_clip_uint8(pixels[1] + block[1]);
pixels[2] = av_clip_uint8(pixels[2] + block[2]);
pixels[3] = av_clip_uint8(pixels[3] + block[3]);
pixels[4] = av_clip_uint8(pixels[4] + block[4]);
pixels[5] = av_clip_uint8(pixels[5] + block[5]);
pixels[6] = av_clip_uint8(pixels[6] + block[6]);
pixels[7] = av_clip_uint8(pixels[7] + block[7]);
pixels += line_size;
block += 8;
}
Expand All @@ -465,14 +461,13 @@ static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels
int line_size)
{
int i;
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;

/* read the pixels */
for(i=0;i<4;i++) {
pixels[0] = cm[pixels[0] + block[0]];
pixels[1] = cm[pixels[1] + block[1]];
pixels[2] = cm[pixels[2] + block[2]];
pixels[3] = cm[pixels[3] + block[3]];
pixels[0] = av_clip_uint8(pixels[0] + block[0]);
pixels[1] = av_clip_uint8(pixels[1] + block[1]);
pixels[2] = av_clip_uint8(pixels[2] + block[2]);
pixels[3] = av_clip_uint8(pixels[3] + block[3]);
pixels += line_size;
block += 8;
}
Expand All @@ -482,12 +477,11 @@ static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels
int line_size)
{
int i;
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;

/* read the pixels */
for(i=0;i<2;i++) {
pixels[0] = cm[pixels[0] + block[0]];
pixels[1] = cm[pixels[1] + block[1]];
pixels[0] = av_clip_uint8(pixels[0] + block[0]);
pixels[1] = av_clip_uint8(pixels[1] + block[1]);
pixels += line_size;
block += 8;
}
Expand Down Expand Up @@ -2779,15 +2773,11 @@ static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)

static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
{
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;

dest[0] = cm[(block[0] + 4)>>3];
dest[0] = av_clip_uint8((block[0] + 4)>>3);
}
static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
{
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;

dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
}

static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
Expand Down
20 changes: 20 additions & 0 deletions libavcodec/error_resilience.c
Original file line number Diff line number Diff line change
Expand Up @@ -440,9 +440,14 @@ static void guess_mv(MpegEncContext *s)
if ((!(s->avctx->error_concealment&FF_EC_GUESS_MVS)) ||
num_avail <= mb_width / 2) {
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
s->mb_x = 0;
s->mb_y = mb_y;
ff_init_block_index(s);
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
const int mb_xy = mb_x + mb_y * s->mb_stride;

ff_update_block_index(s);

if (IS_INTRA(s->current_picture.f.mb_type[mb_xy]))
continue;
if (!(s->error_status_table[mb_xy] & ER_MV_ERROR))
Expand Down Expand Up @@ -477,6 +482,9 @@ static void guess_mv(MpegEncContext *s)

changed = 0;
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
s->mb_x = 0;
s->mb_y = mb_y;
ff_init_block_index(s);
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
const int mb_xy = mb_x + mb_y * s->mb_stride;
int mv_predictor[8][2] = { { 0 } };
Expand All @@ -488,6 +496,8 @@ static void guess_mv(MpegEncContext *s)
const int mot_index = (mb_x + mb_y * mot_stride) * mot_step;
int prev_x, prev_y, prev_ref;

ff_update_block_index(s);

if ((mb_x ^ mb_y ^ pass) & 1)
continue;

Expand Down Expand Up @@ -1098,11 +1108,16 @@ void ff_er_frame_end(MpegEncContext *s)

/* handle inter blocks with damaged AC */
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
s->mb_x = 0;
s->mb_y = mb_y;
ff_init_block_index(s);
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
const int mb_xy = mb_x + mb_y * s->mb_stride;
const int mb_type = s->current_picture.f.mb_type[mb_xy];
int dir = !s->last_picture.f.data[0];

ff_update_block_index(s);

error = s->error_status_table[mb_xy];

if (IS_INTRA(mb_type))
Expand Down Expand Up @@ -1140,11 +1155,16 @@ void ff_er_frame_end(MpegEncContext *s)
/* guess MVs */
if (s->pict_type == AV_PICTURE_TYPE_B) {
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
s->mb_x = 0;
s->mb_y = mb_y;
ff_init_block_index(s);
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
int xy = mb_x * 2 + mb_y * 2 * s->b8_stride;
const int mb_xy = mb_x + mb_y * s->mb_stride;
const int mb_type = s->current_picture.f.mb_type[mb_xy];

ff_update_block_index(s);

error = s->error_status_table[mb_xy];

if (IS_INTRA(mb_type))
Expand Down
Loading

0 comments on commit 6df42f9

Please sign in to comment.