Fixes warning C4310 from Visual Studio (intel#26)

* Fixed warnings in Visual Studio. * Fixed warning in VisualStudio, regarding conversion from double to float, or float to double. * Added comments with the constants in hexadecimal to help with code readability.
beru · Feb 27, 2019 · 91c5962 · 91c5962
1 parent 6516a61
commit 91c5962
Showing 1 changed file with 32 additions and 32 deletions.
diff --git a/NEON_2_SSE.h b/NEON_2_SSE.h
@@ -3147,7 +3147,7 @@ _NEON2SSE_INLINE int8x16_t  vrhaddq_s8(int8x16_t a, int8x16_t b) // VRHADD.S8 q0
 {
     //no signed average in x86 SIMD, go to unsigned
     __m128i c128, au, bu, sum;
-    c128 = _mm_set1_epi8((int8_t)0x80); //-128
+    c128 = _mm_set1_epi8(-128); //(int8_t)0x80
     au = _mm_sub_epi8(a, c128); //add 128
     bu = _mm_sub_epi8(b, c128); //add 128
     sum = _mm_avg_epu8(au, bu);
@@ -3159,7 +3159,7 @@ _NEON2SSE_INLINE int16x8_t  vrhaddq_s16(int16x8_t a, int16x8_t b) // VRHADD.S16
 {
     //no signed average in x86 SIMD, go to unsigned
     __m128i cx8000, au, bu, sum;
-    cx8000 = _mm_set1_epi16((int16_t)0x8000); // - 32768
+    cx8000 = _mm_set1_epi16(-32768); //(int16_t)0x8000
     au = _mm_sub_epi16(a, cx8000); //add 32768
     bu = _mm_sub_epi16(b, cx8000); //add 32768
     sum = _mm_avg_epu16(au, bu);
@@ -4774,9 +4774,9 @@ _NEON2SSE_INLINE uint32x2_t vhsub_u32(uint32x2_t a,  uint32x2_t b)
 _NEON2SSESTORAGE int8x16_t vhsubq_s8(int8x16_t a, int8x16_t b); // VHSUB.S8 q0,q0,q0
 _NEON2SSE_INLINE int8x16_t vhsubq_s8(int8x16_t a, int8x16_t b) // VHSUB.S8 q0,q0,q0
 {
-    // //need to deal with the possibility of internal overflow
+    //need to deal with the possibility of internal overflow
     __m128i c128, au,bu;
-    c128 = _mm_set1_epi8((int8_t)128);
+    c128 = _mm_set1_epi8(-128); //(int8_t)0x80
     au = _mm_add_epi8( a, c128);
     bu = _mm_add_epi8( b, c128);
     return vhsubq_u8(au,bu);
@@ -4787,7 +4787,7 @@ _NEON2SSE_INLINE int16x8_t vhsubq_s16(int16x8_t a, int16x8_t b) // VHSUB.S16 q0,
 {
     //need to deal with the possibility of internal overflow
     __m128i c8000, au,bu;
-    c8000 = _mm_set1_epi16((int16_t)0x8000);
+    c8000 = _mm_set1_epi16(-32768); //(int16_t)0x8000
     au = _mm_add_epi16( a, c8000);
     bu = _mm_add_epi16( b, c8000);
     return vhsubq_u16(au,bu);
@@ -6312,7 +6312,7 @@ _NEON2SSE_INLINE uint16x4_t vpadd_u16(uint16x4_t a, uint16x4_t b) // VPADD.I16 d
     uint16x4_t res64;
     __m128i c32767,  cfffe, as, bs, res;
     c32767 = _mm_set1_epi16 (32767);
-    cfffe = _mm_set1_epi16 ((int16_t)0xfffe);
+    cfffe = _mm_set1_epi16 (-2); //(int16_t)0xfffe
     as = _mm_sub_epi16 (_pM128i(a), c32767);
     bs = _mm_sub_epi16 (_pM128i(b), c32767);
     res = _mm_hadd_epi16 (as, bs);
@@ -6798,11 +6798,11 @@ _NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x2_t vrecpe_u32(uint32x2_t
             res.m64_u32[i] = 0xffffffff;
         }else{
             resf =  (float) (a.m64_u32[i] * (0.5f / (uint32_t)(1 << 31)));
-            q = (int)(resf * 512.0); /* a in units of 1/512 rounded down */
-            r = (float)(1.0 / (((float)q + 0.5) / 512.0)); /* reciprocal r */
-            s = (int)(256.0 * r + 0.5); /* r in units of 1/256 rounded to nearest */
-            r =  (float)s / 256.0;
-            res.m64_u32[i] = r * (uint32_t)(1 << 31);
+            q = (int)(resf * 512.0f); /* a in units of 1/512 rounded down */
+            r = (float)(1.0f / (((float)q + 0.5f) / 512.0f)); /* reciprocal r */
+            s = (int)(256.0f * r + 0.5f); /* r in units of 1/256 rounded to nearest */
+            r =  (float)s / 256.0f;
+            res.m64_u32[i] = (uint32_t)(r * (uint32_t)(1 << 31));
         }
     }
     return res;
@@ -6827,10 +6827,10 @@ _NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x4_t vrecpeq_u32(uint32x4_t
     zero = _mm_setzero_si128();
     for (i =0; i<4; i++){
         resf = (atmp[i] * (0.5f / (uint32_t) (1 << 31)));  //  2.3283064365386963E-10 ~(0.5f / (uint32_t) (1 << 31))
-        q = (int)(resf * 512.0); /* a in units of 1/512 rounded down */
-        r = 1.0 / (((float)q + 0.5) / 512.0); /* reciprocal r */
-        s = (int)(256.0 * r + 0.5); /* r in units of 1/256 rounded to nearest */
-        r =  (float)s / 256.0;
+        q = (int)(resf * 512.0f); /* a in units of 1/512 rounded down */
+        r = 1.0f / (((float)q + 0.5f) / 512.0f); /* reciprocal r */
+        s = (int)(256.0f * r + 0.5f); /* r in units of 1/256 rounded to nearest */
+        r =  (float)s / 256.0f;
         res[i] = (uint32_t) (r * (((uint32_t)1) << 31) );
     }
     res128 = _mm_load_si128((__m128i*)res);
@@ -6868,14 +6868,14 @@ _NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x2_t vrsqrte_u32(uint32x2_t
             res.m64_u32[i] = 0xffffffff;
         }else{
             resf =  (float) (a.m64_u32[i] * (0.5f / (uint32_t)(1 << 31)));
-            coeff = (resf < 0.5)? 512.0 : 256.0 ; /* range 0.25 <= resf < 0.5  or range 0.5 <= resf < 1.0*/
+            coeff = (resf < 0.5f)? 512.0f : 256.0f ; /* range 0.25 <= resf < 0.5  or range 0.5 <= resf < 1.0*/
             q0 = (int)(resf * coeff); /* a in units of 1/512 rounded down */
-            r = ((float)q0 + 0.5) / coeff;
+            r = ((float)q0 + 0.5f) / coeff;
             tmp = _mm_rsqrt_ss(_mm_load_ss( &r));/* reciprocal root r */
             _mm_store_ss(&r, tmp);
-            s = (int)(256.0 * r + 0.5); /* r in units of 1/256 rounded to nearest */
-            r = (float)(s / 256.0);
-            res.m64_u32[i] = r * (((uint32_t)1) << 31);
+            s = (int)(256.0f * r + 0.5f); /* r in units of 1/256 rounded to nearest */
+            r = (float)(s / 256.0f);
+            res.m64_u32[i] = (uint32_t)(r * (((uint32_t)1) << 31));
         }
     }
     return res;
@@ -6899,13 +6899,13 @@ _NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(uint32x4_t vrsqrteq_u32(uint32x4_
     zero = _mm_setzero_si128();
     for (i =0; i<4; i++){
         resf =  (float) (atmp[i] * (0.5f / (uint32_t)(1 << 31)));
-        coeff = (float)((resf < 0.5)? 512.0 : 256.0); /* range 0.25 <= resf < 0.5  or range 0.5 <= resf < 1.0*/
+        coeff = (float)((resf < 0.5f)? 512.0f : 256.0f); /* range 0.25 <= resf < 0.5  or range 0.5 <= resf < 1.0*/
         q0 = (int)(resf * coeff); /* a in units of 1/512 rounded down */
-        r = ((float)q0 + 0.5) / coeff;
+        r = ((float)q0 + 0.5f) / coeff;
         tmp = _mm_rsqrt_ss(_mm_load_ss( &r));/* reciprocal root r */
         _mm_store_ss(&r, tmp);
-        s = (int)(256.0 * r + 0.5); /* r in units of 1/256 rounded to nearest */
-        r = (float)s / 256.0;
+        s = (int)(256.0f * r + 0.5f); /* r in units of 1/256 rounded to nearest */
+        r = (float)s / 256.0f;
         res[i] = (uint32_t) (r * (((uint32_t)1) << 31) );
     }
     res128 = _mm_load_si128((__m128i*)res);
@@ -6955,8 +6955,8 @@ _NEON2SSESTORAGE float32x4_t vrsqrtsq_f32(float32x4_t a, float32x4_t b); // VRSQ
 _NEON2SSE_INLINE float32x4_t vrsqrtsq_f32(float32x4_t a, float32x4_t b) // VRSQRTS.F32 q0, q0, q0
 {
     __m128 f3, f05, mul;
-    f3 =  _mm_set1_ps(3.);
-    f05 =  _mm_set1_ps(0.5);
+    f3 =  _mm_set1_ps(3.f);
+    f05 =  _mm_set1_ps(0.5f);
     mul = _mm_mul_ps(a,b);
     f3 = _mm_sub_ps(f3,mul);
     return _mm_mul_ps (f3, f05);
@@ -8393,7 +8393,7 @@ _NEON2SSE_INLINE uint16x8_t vqshlq_n_u16(uint16x8_t a, __constrange(0,15) int b)
     // manual saturation solution looks more optimal than 32 bits conversion one
     __m128i cb, c8000, a_signed, saturation_mask,  shift_res;
     cb = _mm_set1_epi16((1 << (16 - b)) - 1 - 0x8000 );
-    c8000 = _mm_set1_epi16 ((int16_t)0x8000);
+    c8000 = _mm_set1_epi16 (-32768); // (int16_t)0x8000
 //no unsigned shorts comparison in SSE, only signed available, so need the trick
     a_signed = _mm_sub_epi16(a, c8000); //go to signed
     saturation_mask = _mm_cmpgt_epi16 (a_signed, cb);
@@ -14692,7 +14692,7 @@ _NEON2SSESTORAGE int8x16_t vqabsq_s8(int8x16_t a); // VQABS.S8 q0,q0
 _NEON2SSE_INLINE int8x16_t vqabsq_s8(int8x16_t a) // VQABS.S8 q0,q0
 {
     __m128i c_128, abs, abs_cmp;
-    c_128 = _mm_set1_epi8 ((int8_t)0x80); //-128
+    c_128 = _mm_set1_epi8 (-128); //(int8_t)0x80
     abs = _mm_abs_epi8 (a);
     abs_cmp = _mm_cmpeq_epi8 (abs, c_128);
     return _mm_xor_si128 (abs,  abs_cmp);
@@ -14702,7 +14702,7 @@ _NEON2SSESTORAGE int16x8_t vqabsq_s16(int16x8_t a); // VQABS.S16 q0,q0
 _NEON2SSE_INLINE int16x8_t vqabsq_s16(int16x8_t a) // VQABS.S16 q0,q0
 {
     __m128i c_32768, abs, abs_cmp;
-    c_32768 = _mm_set1_epi16 ((int16_t)0x8000); //-32768
+    c_32768 = _mm_set1_epi16 (-32768); //(int16_t)0x8000
     abs = _mm_abs_epi16 (a);
     abs_cmp = _mm_cmpeq_epi16 (abs, c_32768);
     return _mm_xor_si128 (abs,  abs_cmp);
@@ -15015,7 +15015,7 @@ _NEON2SSE_INLINE int8x16_t vclsq_s8(int8x16_t a)
 {
     __m128i cff, c80, c1, a_mask, a_neg, a_pos, a_comb;
     cff = _mm_cmpeq_epi8 (a,a); //0xff
-    c80 = _mm_set1_epi8((int8_t)0x80);
+    c80 = _mm_set1_epi8(-128); //(int8_t)0x80
     c1 = _mm_set1_epi8(1);
     a_mask = _mm_and_si128(a, c80);
     a_mask = _mm_cmpeq_epi8(a_mask, c80); //0xff if negative input and 0 if positive
@@ -16711,8 +16711,8 @@ _NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(float64x2_t vrndnq_f64(float64x2_
 {
      _NEON2SSE_ALIGN_16 float64_t res[2];
      _mm_store_pd(res, a);
-     res[0] = nearbyintf(res[0]);
-     res[1] = nearbyintf(res[1]);
+     res[0] = nearbyint(res[0]);
+     res[1] = nearbyint(res[1]);
      return _mm_load_pd(res);
 }
 #endif