diff options
Diffstat (limited to 'simd/x86_64/jsimd.c')
-rw-r--r-- | simd/x86_64/jsimd.c | 74 |
1 files changed, 56 insertions, 18 deletions
diff --git a/simd/x86_64/jsimd.c b/simd/x86_64/jsimd.c index eb766799..3f5ee77e 100644 --- a/simd/x86_64/jsimd.c +++ b/simd/x86_64/jsimd.c @@ -2,8 +2,8 @@ * jsimd_x86_64.c * * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB - * Copyright (C) 2009-2011, 2014, 2016, 2018, D. R. Commander. - * Copyright (C) 2015-2016, 2018, Matthieu Darbois. + * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2023, D. R. Commander. + * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois. * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. @@ -21,7 +21,6 @@ #include "../../jdct.h" #include "../../jsimddct.h" #include "../jsimd.h" -#include "jconfigint.h" /* * In the PIC cases, we have no guarantee that constants will keep @@ -32,19 +31,17 @@ #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */ -static unsigned int simd_support = (unsigned int)(~0); -static unsigned int simd_huffman = 1; +static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0); +static THREAD_LOCAL unsigned int simd_huffman = 1; /* * Check what SIMD accelerations are supported. - * - * FIXME: This code is racy under a multi-threaded environment. */ LOCAL(void) init_simd(void) { #ifndef NO_GETENV - char *env = NULL; + char env[2] = { 0 }; #endif if (simd_support != ~0U) @@ -54,17 +51,13 @@ init_simd(void) #ifndef NO_GETENV /* Force different settings through environment variables */ - env = getenv("JSIMD_FORCESSE2"); - if ((env != NULL) && (strcmp(env, "1") == 0)) + if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1")) simd_support &= JSIMD_SSE2; - env = getenv("JSIMD_FORCEAVX2"); - if ((env != NULL) && (strcmp(env, "1") == 0)) + if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1")) simd_support &= JSIMD_AVX2; - env = getenv("JSIMD_FORCENONE"); - if ((env != NULL) && (strcmp(env, "1") == 0)) + if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1")) simd_support = 0; - env = getenv("JSIMD_NOHUFFENC"); - if ((env != NULL) && (strcmp(env, "1") == 0)) + if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1")) simd_huffman = 0; #endif } @@ -152,6 +145,9 @@ jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + if (simd_support == ~0U) + init_simd(); + switch (cinfo->in_color_space) { case JCS_EXT_RGB: avx2fct = jsimd_extrgb_ycc_convert_avx2; @@ -201,6 +197,9 @@ jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + if (simd_support == ~0U) + init_simd(); + switch (cinfo->in_color_space) { case JCS_EXT_RGB: avx2fct = jsimd_extrgb_gray_convert_avx2; @@ -250,6 +249,9 @@ jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + if (simd_support == ~0U) + init_simd(); + switch (cinfo->out_color_space) { case JCS_EXT_RGB: avx2fct = jsimd_ycc_extrgb_convert_avx2; @@ -340,6 +342,9 @@ GLOBAL(void) jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY output_data) { + if (simd_support == ~0U) + init_simd(); + if (simd_support & JSIMD_AVX2) jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, compptr->v_samp_factor, @@ -356,6 +361,9 @@ GLOBAL(void) jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY output_data) { + if (simd_support == ~0U) + init_simd(); + if (simd_support & JSIMD_AVX2) jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, compptr->v_samp_factor, @@ -410,6 +418,9 @@ GLOBAL(void) jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { + if (simd_support == ~0U) + init_simd(); + if (simd_support & JSIMD_AVX2) jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, input_data, output_data_ptr); @@ -422,6 +433,9 @@ GLOBAL(void) jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { + if (simd_support == ~0U) + init_simd(); + if (simd_support & JSIMD_AVX2) jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, input_data, output_data_ptr); @@ -476,6 +490,9 @@ GLOBAL(void) jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { + if (simd_support == ~0U) + init_simd(); + if (simd_support & JSIMD_AVX2) jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor, compptr->downsampled_width, input_data, @@ -490,6 +507,9 @@ GLOBAL(void) jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { + if (simd_support == ~0U) + init_simd(); + if (simd_support & JSIMD_AVX2) jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor, compptr->downsampled_width, input_data, @@ -549,6 +569,9 @@ jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + if (simd_support == ~0U) + init_simd(); + switch (cinfo->out_color_space) { case JCS_EXT_RGB: avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2; @@ -597,6 +620,9 @@ jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + if (simd_support == ~0U) + init_simd(); + switch (cinfo->out_color_space) { case JCS_EXT_RGB: avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2; @@ -686,6 +712,9 @@ GLOBAL(void) jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace) { + if (simd_support == ~0U) + init_simd(); + if (simd_support & JSIMD_AVX2) jsimd_convsamp_avx2(sample_data, start_col, workspace); else @@ -755,6 +784,9 @@ jsimd_can_fdct_float(void) GLOBAL(void) jsimd_fdct_islow(DCTELEM *data) { + if (simd_support == ~0U) + init_simd(); + if (simd_support & JSIMD_AVX2) jsimd_fdct_islow_avx2(data); else @@ -816,6 +848,9 @@ jsimd_can_quantize_float(void) GLOBAL(void) jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) { + if (simd_support == ~0U) + init_simd(); + if (simd_support & JSIMD_AVX2) jsimd_quantize_avx2(coef_block, divisors, workspace); else @@ -970,6 +1005,9 @@ jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { + if (simd_support == ~0U) + init_simd(); + if (simd_support & JSIMD_AVX2) jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf, output_col); @@ -1040,7 +1078,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void) GLOBAL(void) jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, - int Al, JCOEF *values, size_t *zerobits) + int Al, UJCOEF *values, size_t *zerobits) { jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start, Sl, Al, values, zerobits); @@ -1064,7 +1102,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void) GLOBAL(int) jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, - int Al, JCOEF *absvalues, size_t *bits) + int Al, UJCOEF *absvalues, size_t *bits) { return jsimd_encode_mcu_AC_refine_prepare_sse2(block, jpeg_natural_order_start, |