Avoid alignas(0) by wrapping in a macro (#452)

C++11 states that alignas(0) should be ignored, but this is broken on at least some versions of GCC (e.g. 11.3) and generates a warning. This PR changes the use of alignas() to wrap the entire alignas() in a macro, instead of just the alignment value, allowing us to omit the annotation when the alignment is zero.
ARM-software · Jan 11, 2024 · e5e4ca4 · e5e4ca4
1 parent b0ca583
commit e5e4ca4
Show file tree

Hide file tree

Showing 12 changed files with 76 additions and 55 deletions.
diff --git a/Docs/ChangeLog-4x.md b/Docs/ChangeLog-4x.md
@@ -18,6 +18,8 @@ The 4.7.0 release is a maintenance release.
  mode rounding rules for the alpha channel.
  * **Bug fix:** Linear LDR decompression now uses correct `decode_unorm8`
  decode mode rounding rules when writing to an 8-bit output image.
+ * **Bug fix:** Avoid using `alignas()` the reference C implementation, as the
+ default `alignas(16)` is narrower than the native alignment on some CPUs.
  * **Feature:** Library configuration supports a new flag,
  `ASTCENC_FLG_USE_DECODE_UNORM8`. This flag indicates that the image will be
  used with the `decode_unorm8` decode mode. When set during compression

diff --git a/Source/UnitTest/test_simd.cpp b/Source/UnitTest/test_simd.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2020-2022 Arm Limited
+// Copyright 2020-2024 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -930,7 +930,7 @@ TEST(vfloat4, gatherf)
 /** @brief Test vfloat4 storea. */
 TEST(vfloat4, storea)
 {
- alignas(16) float out[4];
+ ASTCENC_ALIGNAS float out[4];
  vfloat4 a(f32_data);
  storea(a, out);
  EXPECT_EQ(out[0], 0.0f);
@@ -942,7 +942,7 @@ TEST(vfloat4, storea)
 /** @brief Test vfloat4 store. */
 TEST(vfloat4, store)
 {
- alignas(16) float out[5];
+ ASTCENC_ALIGNAS float out[5];
  vfloat4 a(f32_data);
  store(a, &(out[1]));
  EXPECT_EQ(out[1], 0.0f);
@@ -1725,7 +1725,7 @@ TEST(vint4, two_to_the_n)
 /** @brief Test vint4 storea. */
 TEST(vint4, storea)
 {
- alignas(16) int out[4];
+ ASTCENC_ALIGNAS int out[4];
  vint4 a(s32_data);
  storea(a, out);
  EXPECT_EQ(out[0], 0);
@@ -1737,7 +1737,7 @@ TEST(vint4, storea)
 /** @brief Test vint4 store. */
 TEST(vint4, store)
 {
- alignas(16) int out[5];
+ ASTCENC_ALIGNAS int out[5];
  vint4 a(s32_data);
  store(a, &(out[1]));
  EXPECT_EQ(out[1], 0);
@@ -1749,7 +1749,7 @@ TEST(vint4, store)
 /** @brief Test vint4 store_nbytes. */
 TEST(vint4, store_nbytes)
 {
- alignas(16) int out;
+ ASTCENC_ALIGNAS int out;
  vint4 a(42, 314, 75, 90);
  store_nbytes(a, reinterpret_cast<uint8_t*>(&out));
  EXPECT_EQ(out, 42);

diff --git a/Source/astcenc_compress_symbolic.cpp b/Source/astcenc_compress_symbolic.cpp
@@ -247,7 +247,7 @@ static bool realign_weights_decimated(
  }
 
  // Create an unquantized weight grid for this decimation level
- alignas(ASTCENC_VECALIGN) float uq_weightsf[BLOCK_MAX_WEIGHTS];
+ ASTCENC_ALIGNAS float uq_weightsf[BLOCK_MAX_WEIGHTS];
  for (unsigned int we_idx = 0; we_idx < weight_count; we_idx += ASTCENC_SIMD_WIDTH)
  {
  vint unquant_value(dec_weights_uquant + we_idx);
@@ -467,7 +467,7 @@ static float compress_symbolic_block_for_partition_1plane(
 
  qwt_bitcounts[i] = static_cast<int8_t>(bitcount);
 
- alignas(ASTCENC_VECALIGN) float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];
+ ASTCENC_ALIGNAS float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];
 
  // Generate the optimized set of weights for the weight mode
  compute_quantized_weights_for_decimation(
@@ -830,7 +830,7 @@ static float compress_symbolic_block_for_partition_2planes(
  unsigned int decimation_mode = bm.decimation_mode;
  const auto& di = bsd.get_decimation_info(decimation_mode);
 
- alignas(ASTCENC_VECALIGN) float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];
+ ASTCENC_ALIGNAS float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];
 
  // Generate the optimized set of weights for the mode
  compute_quantized_weights_for_decimation(

diff --git a/Source/astcenc_decompress_symbolic.cpp b/Source/astcenc_decompress_symbolic.cpp
@@ -533,7 +533,7 @@ float compute_symbolic_block_difference_1plane_1partition(
  const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
 
  // Unquantize and undecimate the weights
- alignas(ASTCENC_VECALIGN) int plane1_weights[BLOCK_MAX_TEXELS];
+ ASTCENC_ALIGNAS int plane1_weights[BLOCK_MAX_TEXELS];
  unpack_weights(bsd, scb, di, false, plane1_weights, nullptr);
 
  // Decode the color endpoints for this partition

diff --git a/Source/astcenc_entry.cpp b/Source/astcenc_entry.cpp
@@ -699,6 +699,12 @@ astcenc_error astcenc_context_alloc(
  }
 
  ctx->bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN);
+ if (!ctx->bsd)
+ {
+ delete ctxo;
+ return ASTCENC_ERR_OUT_OF_MEM;
+ }
+
  bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY);
  init_block_size_descriptor(config.block_x, config.block_y, config.block_z,
  can_omit_modes,

diff --git a/Source/astcenc_ideal_endpoints_and_weights.cpp b/Source/astcenc_ideal_endpoints_and_weights.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2023 Arm Limited
+// Copyright 2011-2024 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -873,7 +873,7 @@ void compute_ideal_weights_for_decimation(
  }
 
  // Otherwise compute an estimate and perform single refinement iteration
- alignas(ASTCENC_VECALIGN) float infilled_weights[BLOCK_MAX_TEXELS];
+ ASTCENC_ALIGNAS float infilled_weights[BLOCK_MAX_TEXELS];
 
  // Compute an initial average for each decimated weight
  bool constant_wes = ei.is_constant_weight_error_scale;
@@ -1171,15 +1171,15 @@ void recompute_ideal_colors_1plane(
  promise(total_texel_count > 0);
  promise(partition_count > 0);
 
- alignas(ASTCENC_VECALIGN) float dec_weight[BLOCK_MAX_WEIGHTS];
+ ASTCENC_ALIGNAS float dec_weight[BLOCK_MAX_WEIGHTS];
  for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
  {
  vint unquant_value(dec_weights_uquant + i);
  vfloat unquant_valuef = int_to_float(unquant_value) * vfloat(1.0f / 64.0f);
  storea(unquant_valuef, dec_weight + i);
  }
 
- alignas(ASTCENC_VECALIGN) float undec_weight[BLOCK_MAX_TEXELS];
+ ASTCENC_ALIGNAS float undec_weight[BLOCK_MAX_TEXELS];
  float* undec_weight_ref;
  if (di.max_texel_weight_count == 1)
  {
@@ -1394,8 +1394,8 @@ void recompute_ideal_colors_2planes(
  promise(total_texel_count > 0);
  promise(weight_count > 0);
 
- alignas(ASTCENC_VECALIGN) float dec_weight_plane1[BLOCK_MAX_WEIGHTS_2PLANE];
- alignas(ASTCENC_VECALIGN) float dec_weight_plane2[BLOCK_MAX_WEIGHTS_2PLANE];
+ ASTCENC_ALIGNAS float dec_weight_plane1[BLOCK_MAX_WEIGHTS_2PLANE];
+ ASTCENC_ALIGNAS float dec_weight_plane2[BLOCK_MAX_WEIGHTS_2PLANE];
 
  assert(weight_count <= BLOCK_MAX_WEIGHTS_2PLANE);
 
@@ -1410,8 +1410,8 @@ void recompute_ideal_colors_2planes(
  storea(unquant_value2f, dec_weight_plane2 + i);
  }
 
- alignas(ASTCENC_VECALIGN) float undec_weight_plane1[BLOCK_MAX_TEXELS];
- alignas(ASTCENC_VECALIGN) float undec_weight_plane2[BLOCK_MAX_TEXELS];
+ ASTCENC_ALIGNAS float undec_weight_plane1[BLOCK_MAX_TEXELS];
+ ASTCENC_ALIGNAS float undec_weight_plane2[BLOCK_MAX_TEXELS];
 
  float* undec_weight_plane1_ref;
  float* undec_weight_plane2_ref;

diff --git a/Source/astcenc_image.cpp b/Source/astcenc_image.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2022 Arm Limited
+// Copyright 2011-2024 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -109,7 +109,7 @@ static vfloat4 swz_texel(
  vfloat4 data,
  const astcenc_swizzle& swz
 ) {
- alignas(16) float datas[6];
+ ASTCENC_ALIGNAS float datas[6];
 
  storea(data, datas);
  datas[ASTCENC_SWZ_0] = 0.0f;

diff --git a/Source/astcenc_internal.h b/Source/astcenc_internal.h
@@ -385,7 +385,7 @@ struct decimation_info
  * @brief The bilinear contribution of the N weights that are interpolated for each texel.
  * Value is between 0 and 1, stored transposed to improve vectorization.
  */
- alignas(ASTCENC_VECALIGN) float texel_weight_contribs_float_tr[4][BLOCK_MAX_TEXELS];
+ ASTCENC_ALIGNAS float texel_weight_contribs_float_tr[4][BLOCK_MAX_TEXELS];
 
  /** @brief The number of texels that each stored weight contributes to. */
  uint8_t weight_texel_count[BLOCK_MAX_WEIGHTS];
@@ -400,7 +400,7 @@ struct decimation_info
  * @brief The bilinear contribution to the N texels that use each weight.
  * Value is between 0 and 1, stored transposed to improve vectorization.
  */
- alignas(ASTCENC_VECALIGN) float weights_texel_contribs_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
+ ASTCENC_ALIGNAS float weights_texel_contribs_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
 
  /**
  * @brief The bilinear contribution to the Nth texel that uses each weight.
@@ -580,7 +580,7 @@ struct block_size_descriptor
  decimation_mode decimation_modes[WEIGHTS_MAX_DECIMATION_MODES];
 
  /** @brief The active decimation tables, stored in low indices. */
- alignas(ASTCENC_VECALIGN) decimation_info decimation_tables[WEIGHTS_MAX_DECIMATION_MODES];
+ ASTCENC_ALIGNAS decimation_info decimation_tables[WEIGHTS_MAX_DECIMATION_MODES];
 
  /** @brief The packed block mode array index, or @c BLOCK_BAD_BLOCK_MODE if not active. */
  uint16_t block_mode_packed_index[WEIGHTS_MAX_BLOCK_MODES];
@@ -740,16 +740,16 @@ struct block_size_descriptor
 struct image_block
 {
  /** @brief The input (compress) or output (decompress) data for the red color component. */
- alignas(ASTCENC_VECALIGN) float data_r[BLOCK_MAX_TEXELS];
+ ASTCENC_ALIGNAS float data_r[BLOCK_MAX_TEXELS];
 
  /** @brief The input (compress) or output (decompress) data for the green color component. */
- alignas(ASTCENC_VECALIGN) float data_g[BLOCK_MAX_TEXELS];
+ ASTCENC_ALIGNAS float data_g[BLOCK_MAX_TEXELS];
 
  /** @brief The input (compress) or output (decompress) data for the blue color component. */
- alignas(ASTCENC_VECALIGN) float data_b[BLOCK_MAX_TEXELS];
+ ASTCENC_ALIGNAS float data_b[BLOCK_MAX_TEXELS];
 
  /** @brief The input (compress) or output (decompress) data for the alpha color component. */
- alignas(ASTCENC_VECALIGN) float data_a[BLOCK_MAX_TEXELS];
+ ASTCENC_ALIGNAS float data_a[BLOCK_MAX_TEXELS];
 
  /** @brief The number of texels in the block. */
  uint8_t texel_count;
@@ -901,10 +901,10 @@ struct endpoints_and_weights
  endpoints ep;
 
  /** @brief The ideal weight for each texel; may be undecimated or decimated. */
- alignas(ASTCENC_VECALIGN) float weights[BLOCK_MAX_TEXELS];
+ ASTCENC_ALIGNAS float weights[BLOCK_MAX_TEXELS];
 
  /** @brief The ideal weight error scaling for each texel; may be undecimated or decimated. */
- alignas(ASTCENC_VECALIGN) float weight_error_scale[BLOCK_MAX_TEXELS];
+ ASTCENC_ALIGNAS float weight_error_scale[BLOCK_MAX_TEXELS];
 };
 
 /**
@@ -934,7 +934,7 @@ struct encoding_choice_errors
 /**
  * @brief Preallocated working buffers, allocated per thread during context creation.
  */
-struct alignas(ASTCENC_VECALIGN) compression_working_buffers
+struct ASTCENC_ALIGNAS compression_working_buffers
 {
  /** @brief Ideal endpoints and weights for plane 1. */
  endpoints_and_weights ei1;
@@ -950,7 +950,7 @@ struct alignas(ASTCENC_VECALIGN) compression_working_buffers
  *
  * For two planes, second plane starts at @c WEIGHTS_PLANE2_OFFSET offsets.
  */
- alignas(ASTCENC_VECALIGN) float dec_weights_ideal[WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS];
+ ASTCENC_ALIGNAS float dec_weights_ideal[WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS];
 
  /**
  * @brief Decimated quantized weight values in the unquantized 0-64 range.
@@ -960,7 +960,7 @@ struct alignas(ASTCENC_VECALIGN) compression_working_buffers
  uint8_t dec_weights_uquant[WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS];
 
  /** @brief Error of the best encoding combination for each block mode. */
- alignas(ASTCENC_VECALIGN) float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES];
+ ASTCENC_ALIGNAS float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES];
 
  /** @brief The best color quant for each block mode. */
  uint8_t best_quant_levels[WEIGHTS_MAX_BLOCK_MODES];
@@ -2173,10 +2173,11 @@ Platform-specific functions.
 /**
  * @brief Allocate an aligned memory buffer.
  *
- * Allocated memory must be freed by aligned_free;
+ * Allocated memory must be freed by aligned_free.
  *
  * @param size The desired buffer size.
- * @param align The desired buffer alignment; must be 2^N.
+ * @param align The desired buffer alignment; must be 2^N, may be increased
+ * by the implementation to a minimum allowable alignment.
  *
  * @return The memory buffer pointer or nullptr on allocation failure.
  */
@@ -2186,10 +2187,14 @@ T* aligned_malloc(size_t size, size_t align)
  void* ptr;
  int error = 0;
 
+ // Don't allow this to under-align a type
+ size_t min_align = astc::max(alignof(T), sizeof(void*));
+ size_t real_align = astc::max(min_align, align);
+
 #if defined(_WIN32)
- ptr = _aligned_malloc(size, align);
+ ptr = _aligned_malloc(size, real_align);
 #else
- error = posix_memalign(&ptr, align, size);
+ error = posix_memalign(&ptr, real_align, size);
 #endif
 
  if (error || (!ptr))

diff --git a/Source/astcenc_mathlib.h b/Source/astcenc_mathlib.h
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2023 Arm Limited
+// Copyright 2011-2024 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -83,6 +83,14 @@
  #define ASTCENC_VECALIGN 0
 #endif
 
+// C++11 states that alignas(0) should be ignored but GCC doesn't do
+// this on some versions, so workaround and avoid emitting alignas(0)
+#if ASTCENC_VECALIGN > 0
+ #define ASTCENC_ALIGNAS alignas(ASTCENC_VECALIGN)
+#else
+ #define ASTCENC_ALIGNAS
+#endif
+
 #if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
  #include <immintrin.h>
 #endif

diff --git a/Source/astcenc_vecmathlib_avx2_8.h b/Source/astcenc_vecmathlib_avx2_8.h
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2019-2022 Arm Limited
+// Copyright 2019-2024 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -1170,7 +1170,7 @@ ASTCENC_SIMD_INLINE void store_lanes_masked(uint8_t* base, vint8 data, vmask8 ma
  */
 ASTCENC_SIMD_INLINE void print(vint8 a)
 {
- alignas(ASTCENC_VECALIGN) int v[8];
+ alignas(32) int v[8];
  storea(a, v);
  printf("v8_i32:\n %8d %8d %8d %8d %8d %8d %8d %8d\n",
  v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
@@ -1181,7 +1181,7 @@ ASTCENC_SIMD_INLINE void print(vint8 a)
  */
 ASTCENC_SIMD_INLINE void printx(vint8 a)
 {
- alignas(ASTCENC_VECALIGN) int v[8];
+ alignas(32) int v[8];
  storea(a, v);
  printf("v8_i32:\n %08x %08x %08x %08x %08x %08x %08x %08x\n",
  v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
@@ -1192,7 +1192,7 @@ ASTCENC_SIMD_INLINE void printx(vint8 a)
  */
 ASTCENC_SIMD_INLINE void print(vfloat8 a)
 {
- alignas(ASTCENC_VECALIGN) float v[8];
+ alignas(32) float v[8];
  storea(a, v);
  printf("v8_f32:\n %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f\n",
  static_cast<double>(v[0]), static_cast<double>(v[1]),

diff --git a/Source/astcenc_vecmathlib_common_4.h b/Source/astcenc_vecmathlib_common_4.h
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2020-2021 Arm Limited
+// Copyright 2020-2024 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -383,7 +383,7 @@ static ASTCENC_SIMD_INLINE void bit_transfer_signed(
  */
 ASTCENC_SIMD_INLINE void print(vint4 a)
 {
- alignas(16) int v[4];
+ ASTCENC_ALIGNAS int v[4];
  storea(a, v);
  printf("v4_i32:\n %8d %8d %8d %8d\n",
  v[0], v[1], v[2], v[3]);
@@ -394,7 +394,7 @@ ASTCENC_SIMD_INLINE void print(vint4 a)
  */
 ASTCENC_SIMD_INLINE void printx(vint4 a)
 {
- alignas(16) int v[4];
+ ASTCENC_ALIGNAS int v[4];
  storea(a, v);
  printf("v4_i32:\n %08x %08x %08x %08x\n",
  v[0], v[1], v[2], v[3]);
@@ -405,7 +405,7 @@ ASTCENC_SIMD_INLINE void printx(vint4 a)
  */
 ASTCENC_SIMD_INLINE void print(vfloat4 a)
 {
- alignas(16) float v[4];
+ ASTCENC_ALIGNAS float v[4];
  storea(a, v);
  printf("v4_f32:\n %0.4f %0.4f %0.4f %0.4f\n",
  static_cast<double>(v[0]), static_cast<double>(v[1]),