Skip to content

Commit

Permalink
Avoid alignas(0) by wrapping in a macro (#452)
Browse files Browse the repository at this point in the history
C++11 states that alignas(0) should be ignored, but this is broken on at least some versions of GCC (e.g. 11.3) and generates a warning. This PR changes the use of alignas() to wrap the entire alignas() in a macro, instead of just the alignment value, allowing us to omit the annotation when the alignment is zero.
  • Loading branch information
solidpixel committed Jan 11, 2024
1 parent b0ca583 commit e5e4ca4
Show file tree
Hide file tree
Showing 12 changed files with 76 additions and 55 deletions.
2 changes: 2 additions & 0 deletions Docs/ChangeLog-4x.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ The 4.7.0 release is a maintenance release.
mode rounding rules for the alpha channel.
* **Bug fix:** Linear LDR decompression now uses correct `decode_unorm8`
decode mode rounding rules when writing to an 8-bit output image.
* **Bug fix:** Avoid using `alignas()` the reference C implementation, as the
default `alignas(16)` is narrower than the native alignment on some CPUs.
* **Feature:** Library configuration supports a new flag,
`ASTCENC_FLG_USE_DECODE_UNORM8`. This flag indicates that the image will be
used with the `decode_unorm8` decode mode. When set during compression
Expand Down
12 changes: 6 additions & 6 deletions Source/UnitTest/test_simd.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2020-2022 Arm Limited
// Copyright 2020-2024 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
Expand Down Expand Up @@ -930,7 +930,7 @@ TEST(vfloat4, gatherf)
/** @brief Test vfloat4 storea. */
TEST(vfloat4, storea)
{
alignas(16) float out[4];
ASTCENC_ALIGNAS float out[4];
vfloat4 a(f32_data);
storea(a, out);
EXPECT_EQ(out[0], 0.0f);
Expand All @@ -942,7 +942,7 @@ TEST(vfloat4, storea)
/** @brief Test vfloat4 store. */
TEST(vfloat4, store)
{
alignas(16) float out[5];
ASTCENC_ALIGNAS float out[5];
vfloat4 a(f32_data);
store(a, &(out[1]));
EXPECT_EQ(out[1], 0.0f);
Expand Down Expand Up @@ -1725,7 +1725,7 @@ TEST(vint4, two_to_the_n)
/** @brief Test vint4 storea. */
TEST(vint4, storea)
{
alignas(16) int out[4];
ASTCENC_ALIGNAS int out[4];
vint4 a(s32_data);
storea(a, out);
EXPECT_EQ(out[0], 0);
Expand All @@ -1737,7 +1737,7 @@ TEST(vint4, storea)
/** @brief Test vint4 store. */
TEST(vint4, store)
{
alignas(16) int out[5];
ASTCENC_ALIGNAS int out[5];
vint4 a(s32_data);
store(a, &(out[1]));
EXPECT_EQ(out[1], 0);
Expand All @@ -1749,7 +1749,7 @@ TEST(vint4, store)
/** @brief Test vint4 store_nbytes. */
TEST(vint4, store_nbytes)
{
alignas(16) int out;
ASTCENC_ALIGNAS int out;
vint4 a(42, 314, 75, 90);
store_nbytes(a, reinterpret_cast<uint8_t*>(&out));
EXPECT_EQ(out, 42);
Expand Down
6 changes: 3 additions & 3 deletions Source/astcenc_compress_symbolic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ static bool realign_weights_decimated(
}

// Create an unquantized weight grid for this decimation level
alignas(ASTCENC_VECALIGN) float uq_weightsf[BLOCK_MAX_WEIGHTS];
ASTCENC_ALIGNAS float uq_weightsf[BLOCK_MAX_WEIGHTS];
for (unsigned int we_idx = 0; we_idx < weight_count; we_idx += ASTCENC_SIMD_WIDTH)
{
vint unquant_value(dec_weights_uquant + we_idx);
Expand Down Expand Up @@ -467,7 +467,7 @@ static float compress_symbolic_block_for_partition_1plane(

qwt_bitcounts[i] = static_cast<int8_t>(bitcount);

alignas(ASTCENC_VECALIGN) float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];
ASTCENC_ALIGNAS float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];

// Generate the optimized set of weights for the weight mode
compute_quantized_weights_for_decimation(
Expand Down Expand Up @@ -830,7 +830,7 @@ static float compress_symbolic_block_for_partition_2planes(
unsigned int decimation_mode = bm.decimation_mode;
const auto& di = bsd.get_decimation_info(decimation_mode);

alignas(ASTCENC_VECALIGN) float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];
ASTCENC_ALIGNAS float dec_weights_uquantf[BLOCK_MAX_WEIGHTS];

// Generate the optimized set of weights for the mode
compute_quantized_weights_for_decimation(
Expand Down
2 changes: 1 addition & 1 deletion Source/astcenc_decompress_symbolic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ float compute_symbolic_block_difference_1plane_1partition(
const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);

// Unquantize and undecimate the weights
alignas(ASTCENC_VECALIGN) int plane1_weights[BLOCK_MAX_TEXELS];
ASTCENC_ALIGNAS int plane1_weights[BLOCK_MAX_TEXELS];
unpack_weights(bsd, scb, di, false, plane1_weights, nullptr);

// Decode the color endpoints for this partition
Expand Down
6 changes: 6 additions & 0 deletions Source/astcenc_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,12 @@ astcenc_error astcenc_context_alloc(
}

ctx->bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN);
if (!ctx->bsd)
{
delete ctxo;
return ASTCENC_ERR_OUT_OF_MEM;
}

bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY);
init_block_size_descriptor(config.block_x, config.block_y, config.block_z,
can_omit_modes,
Expand Down
16 changes: 8 additions & 8 deletions Source/astcenc_ideal_endpoints_and_weights.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2023 Arm Limited
// Copyright 2011-2024 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
Expand Down Expand Up @@ -873,7 +873,7 @@ void compute_ideal_weights_for_decimation(
}

// Otherwise compute an estimate and perform single refinement iteration
alignas(ASTCENC_VECALIGN) float infilled_weights[BLOCK_MAX_TEXELS];
ASTCENC_ALIGNAS float infilled_weights[BLOCK_MAX_TEXELS];

// Compute an initial average for each decimated weight
bool constant_wes = ei.is_constant_weight_error_scale;
Expand Down Expand Up @@ -1171,15 +1171,15 @@ void recompute_ideal_colors_1plane(
promise(total_texel_count > 0);
promise(partition_count > 0);

alignas(ASTCENC_VECALIGN) float dec_weight[BLOCK_MAX_WEIGHTS];
ASTCENC_ALIGNAS float dec_weight[BLOCK_MAX_WEIGHTS];
for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH)
{
vint unquant_value(dec_weights_uquant + i);
vfloat unquant_valuef = int_to_float(unquant_value) * vfloat(1.0f / 64.0f);
storea(unquant_valuef, dec_weight + i);
}

alignas(ASTCENC_VECALIGN) float undec_weight[BLOCK_MAX_TEXELS];
ASTCENC_ALIGNAS float undec_weight[BLOCK_MAX_TEXELS];
float* undec_weight_ref;
if (di.max_texel_weight_count == 1)
{
Expand Down Expand Up @@ -1394,8 +1394,8 @@ void recompute_ideal_colors_2planes(
promise(total_texel_count > 0);
promise(weight_count > 0);

alignas(ASTCENC_VECALIGN) float dec_weight_plane1[BLOCK_MAX_WEIGHTS_2PLANE];
alignas(ASTCENC_VECALIGN) float dec_weight_plane2[BLOCK_MAX_WEIGHTS_2PLANE];
ASTCENC_ALIGNAS float dec_weight_plane1[BLOCK_MAX_WEIGHTS_2PLANE];
ASTCENC_ALIGNAS float dec_weight_plane2[BLOCK_MAX_WEIGHTS_2PLANE];

assert(weight_count <= BLOCK_MAX_WEIGHTS_2PLANE);

Expand All @@ -1410,8 +1410,8 @@ void recompute_ideal_colors_2planes(
storea(unquant_value2f, dec_weight_plane2 + i);
}

alignas(ASTCENC_VECALIGN) float undec_weight_plane1[BLOCK_MAX_TEXELS];
alignas(ASTCENC_VECALIGN) float undec_weight_plane2[BLOCK_MAX_TEXELS];
ASTCENC_ALIGNAS float undec_weight_plane1[BLOCK_MAX_TEXELS];
ASTCENC_ALIGNAS float undec_weight_plane2[BLOCK_MAX_TEXELS];

float* undec_weight_plane1_ref;
float* undec_weight_plane2_ref;
Expand Down
4 changes: 2 additions & 2 deletions Source/astcenc_image.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2022 Arm Limited
// Copyright 2011-2024 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
Expand Down Expand Up @@ -109,7 +109,7 @@ static vfloat4 swz_texel(
vfloat4 data,
const astcenc_swizzle& swz
) {
alignas(16) float datas[6];
ASTCENC_ALIGNAS float datas[6];

storea(data, datas);
datas[ASTCENC_SWZ_0] = 0.0f;
Expand Down
37 changes: 21 additions & 16 deletions Source/astcenc_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ struct decimation_info
* @brief The bilinear contribution of the N weights that are interpolated for each texel.
* Value is between 0 and 1, stored transposed to improve vectorization.
*/
alignas(ASTCENC_VECALIGN) float texel_weight_contribs_float_tr[4][BLOCK_MAX_TEXELS];
ASTCENC_ALIGNAS float texel_weight_contribs_float_tr[4][BLOCK_MAX_TEXELS];

/** @brief The number of texels that each stored weight contributes to. */
uint8_t weight_texel_count[BLOCK_MAX_WEIGHTS];
Expand All @@ -400,7 +400,7 @@ struct decimation_info
* @brief The bilinear contribution to the N texels that use each weight.
* Value is between 0 and 1, stored transposed to improve vectorization.
*/
alignas(ASTCENC_VECALIGN) float weights_texel_contribs_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
ASTCENC_ALIGNAS float weights_texel_contribs_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];

/**
* @brief The bilinear contribution to the Nth texel that uses each weight.
Expand Down Expand Up @@ -580,7 +580,7 @@ struct block_size_descriptor
decimation_mode decimation_modes[WEIGHTS_MAX_DECIMATION_MODES];

/** @brief The active decimation tables, stored in low indices. */
alignas(ASTCENC_VECALIGN) decimation_info decimation_tables[WEIGHTS_MAX_DECIMATION_MODES];
ASTCENC_ALIGNAS decimation_info decimation_tables[WEIGHTS_MAX_DECIMATION_MODES];

/** @brief The packed block mode array index, or @c BLOCK_BAD_BLOCK_MODE if not active. */
uint16_t block_mode_packed_index[WEIGHTS_MAX_BLOCK_MODES];
Expand Down Expand Up @@ -740,16 +740,16 @@ struct block_size_descriptor
struct image_block
{
/** @brief The input (compress) or output (decompress) data for the red color component. */
alignas(ASTCENC_VECALIGN) float data_r[BLOCK_MAX_TEXELS];
ASTCENC_ALIGNAS float data_r[BLOCK_MAX_TEXELS];

/** @brief The input (compress) or output (decompress) data for the green color component. */
alignas(ASTCENC_VECALIGN) float data_g[BLOCK_MAX_TEXELS];
ASTCENC_ALIGNAS float data_g[BLOCK_MAX_TEXELS];

/** @brief The input (compress) or output (decompress) data for the blue color component. */
alignas(ASTCENC_VECALIGN) float data_b[BLOCK_MAX_TEXELS];
ASTCENC_ALIGNAS float data_b[BLOCK_MAX_TEXELS];

/** @brief The input (compress) or output (decompress) data for the alpha color component. */
alignas(ASTCENC_VECALIGN) float data_a[BLOCK_MAX_TEXELS];
ASTCENC_ALIGNAS float data_a[BLOCK_MAX_TEXELS];

/** @brief The number of texels in the block. */
uint8_t texel_count;
Expand Down Expand Up @@ -901,10 +901,10 @@ struct endpoints_and_weights
endpoints ep;

/** @brief The ideal weight for each texel; may be undecimated or decimated. */
alignas(ASTCENC_VECALIGN) float weights[BLOCK_MAX_TEXELS];
ASTCENC_ALIGNAS float weights[BLOCK_MAX_TEXELS];

/** @brief The ideal weight error scaling for each texel; may be undecimated or decimated. */
alignas(ASTCENC_VECALIGN) float weight_error_scale[BLOCK_MAX_TEXELS];
ASTCENC_ALIGNAS float weight_error_scale[BLOCK_MAX_TEXELS];
};

/**
Expand Down Expand Up @@ -934,7 +934,7 @@ struct encoding_choice_errors
/**
* @brief Preallocated working buffers, allocated per thread during context creation.
*/
struct alignas(ASTCENC_VECALIGN) compression_working_buffers
struct ASTCENC_ALIGNAS compression_working_buffers
{
/** @brief Ideal endpoints and weights for plane 1. */
endpoints_and_weights ei1;
Expand All @@ -950,7 +950,7 @@ struct alignas(ASTCENC_VECALIGN) compression_working_buffers
*
* For two planes, second plane starts at @c WEIGHTS_PLANE2_OFFSET offsets.
*/
alignas(ASTCENC_VECALIGN) float dec_weights_ideal[WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS];
ASTCENC_ALIGNAS float dec_weights_ideal[WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS];

/**
* @brief Decimated quantized weight values in the unquantized 0-64 range.
Expand All @@ -960,7 +960,7 @@ struct alignas(ASTCENC_VECALIGN) compression_working_buffers
uint8_t dec_weights_uquant[WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS];

/** @brief Error of the best encoding combination for each block mode. */
alignas(ASTCENC_VECALIGN) float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES];
ASTCENC_ALIGNAS float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES];

/** @brief The best color quant for each block mode. */
uint8_t best_quant_levels[WEIGHTS_MAX_BLOCK_MODES];
Expand Down Expand Up @@ -2173,10 +2173,11 @@ Platform-specific functions.
/**
* @brief Allocate an aligned memory buffer.
*
* Allocated memory must be freed by aligned_free;
* Allocated memory must be freed by aligned_free.
*
* @param size The desired buffer size.
* @param align The desired buffer alignment; must be 2^N.
* @param align The desired buffer alignment; must be 2^N, may be increased
* by the implementation to a minimum allowable alignment.
*
* @return The memory buffer pointer or nullptr on allocation failure.
*/
Expand All @@ -2186,10 +2187,14 @@ T* aligned_malloc(size_t size, size_t align)
void* ptr;
int error = 0;

// Don't allow this to under-align a type
size_t min_align = astc::max(alignof(T), sizeof(void*));
size_t real_align = astc::max(min_align, align);

#if defined(_WIN32)
ptr = _aligned_malloc(size, align);
ptr = _aligned_malloc(size, real_align);
#else
error = posix_memalign(&ptr, align, size);
error = posix_memalign(&ptr, real_align, size);
#endif

if (error || (!ptr))
Expand Down
10 changes: 9 additions & 1 deletion Source/astcenc_mathlib.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2023 Arm Limited
// Copyright 2011-2024 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
Expand Down Expand Up @@ -83,6 +83,14 @@
#define ASTCENC_VECALIGN 0
#endif

// C++11 states that alignas(0) should be ignored but GCC doesn't do
// this on some versions, so workaround and avoid emitting alignas(0)
#if ASTCENC_VECALIGN > 0
#define ASTCENC_ALIGNAS alignas(ASTCENC_VECALIGN)
#else
#define ASTCENC_ALIGNAS
#endif

#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
#include <immintrin.h>
#endif
Expand Down
8 changes: 4 additions & 4 deletions Source/astcenc_vecmathlib_avx2_8.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2019-2022 Arm Limited
// Copyright 2019-2024 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
Expand Down Expand Up @@ -1170,7 +1170,7 @@ ASTCENC_SIMD_INLINE void store_lanes_masked(uint8_t* base, vint8 data, vmask8 ma
*/
ASTCENC_SIMD_INLINE void print(vint8 a)
{
alignas(ASTCENC_VECALIGN) int v[8];
alignas(32) int v[8];
storea(a, v);
printf("v8_i32:\n %8d %8d %8d %8d %8d %8d %8d %8d\n",
v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
Expand All @@ -1181,7 +1181,7 @@ ASTCENC_SIMD_INLINE void print(vint8 a)
*/
ASTCENC_SIMD_INLINE void printx(vint8 a)
{
alignas(ASTCENC_VECALIGN) int v[8];
alignas(32) int v[8];
storea(a, v);
printf("v8_i32:\n %08x %08x %08x %08x %08x %08x %08x %08x\n",
v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
Expand All @@ -1192,7 +1192,7 @@ ASTCENC_SIMD_INLINE void printx(vint8 a)
*/
ASTCENC_SIMD_INLINE void print(vfloat8 a)
{
alignas(ASTCENC_VECALIGN) float v[8];
alignas(32) float v[8];
storea(a, v);
printf("v8_f32:\n %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f\n",
static_cast<double>(v[0]), static_cast<double>(v[1]),
Expand Down
8 changes: 4 additions & 4 deletions Source/astcenc_vecmathlib_common_4.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2020-2021 Arm Limited
// Copyright 2020-2024 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
Expand Down Expand Up @@ -383,7 +383,7 @@ static ASTCENC_SIMD_INLINE void bit_transfer_signed(
*/
ASTCENC_SIMD_INLINE void print(vint4 a)
{
alignas(16) int v[4];
ASTCENC_ALIGNAS int v[4];
storea(a, v);
printf("v4_i32:\n %8d %8d %8d %8d\n",
v[0], v[1], v[2], v[3]);
Expand All @@ -394,7 +394,7 @@ ASTCENC_SIMD_INLINE void print(vint4 a)
*/
ASTCENC_SIMD_INLINE void printx(vint4 a)
{
alignas(16) int v[4];
ASTCENC_ALIGNAS int v[4];
storea(a, v);
printf("v4_i32:\n %08x %08x %08x %08x\n",
v[0], v[1], v[2], v[3]);
Expand All @@ -405,7 +405,7 @@ ASTCENC_SIMD_INLINE void printx(vint4 a)
*/
ASTCENC_SIMD_INLINE void print(vfloat4 a)
{
alignas(16) float v[4];
ASTCENC_ALIGNAS float v[4];
storea(a, v);
printf("v4_f32:\n %0.4f %0.4f %0.4f %0.4f\n",
static_cast<double>(v[0]), static_cast<double>(v[1]),
Expand Down

0 comments on commit e5e4ca4

Please sign in to comment.