/* * Main configuration header of the CPU dispatcher. * * This header is autogenerated by the Meson build script located at `meson_cpu/meson.build`. * It provides a set of utilities that are required for the runtime dispatching process. * * The most important macros in this header are: * - @ref @P@CPU_DISPATCH_DECLARE: Used to declare the dispatched functions and variables. * - @ref @P@CPU_DISPATCH_CURFX: Used to define the dispatched functions with target-specific suffixes. * - @ref @P@CPU_DISPATCH_CALL: Used for runtime dispatching of the exported functions and variables. */ #ifndef @P@_CPU_DISPATCHER_CONF_H_ #define @P@_CPU_DISPATCHER_CONF_H_ /** * @def @P@WITH_CPU_BASELINE * Enabled baseline features names as a single string where each is separated by a single space. * For example: "SSE SSE2 SSE3" * Required for logging purposes only. */ #define @P@WITH_CPU_BASELINE "@WITH_CPU_BASELINE@" /** * @def @P@WITH_CPU_BASELINE_N * Number of enabled baseline features. */ #define @P@WITH_CPU_BASELINE_N @WITH_CPU_BASELINE_N@ /** * @def @P@WITH_CPU_DISPATCH * Dispatched features names as a single string where each is separated by a single space. */ #define @P@WITH_CPU_DISPATCH "@WITH_CPU_DISPATCH@" /** * @def @P@WITH_CPU_DISPATCH_N * Number of enabled dispatched features. */ #define @P@WITH_CPU_DISPATCH_N @WITH_CPU_DISPATCH_N@ // Expand a macro, used by the following macros #define @P@_CPU_EXPAND(X) X #define @P@_CPU_CAT__(a, b) a ## b #define @P@_CPU_CAT_(a, b) @P@_CPU_CAT__(a, b) #define @P@_CPU_CAT(a, b) @P@_CPU_CAT_(a, b) #define @P@_CPU_STRINGIFY(x) #x #define @P@_CPU_TOSTRING(x) @P@_CPU_STRINGIFY(x) /** * @def @P@WITH_CPU_BASELINE_CALL(EXEC_CB, ...) * Call each enabled baseline feature sorted by lowest interest * using preprocessor callback without testing whether the * feature is supported by CPU or not. * * Required for logging purposes only, for example, generating * a Python list to hold the information of the enabled features. * * Unwrapped Version: * @code * #define @P@WITH_CPU_BASELINE_CALL(EXEC_CB, ...) \ * @P@_CPU_EXPAND(EXEC_CB(SSE, __VA_ARGS__)) \ * @P@_CPU_EXPAND(EXEC_CB(SSE2, __VA_ARGS__)) \ * @P@_CPU_EXPAND(EXEC_CB(SSE3, __VA_ARGS__)) * @endcode * * @param EXEC_CB The preprocessor callback to be called for each enabled baseline feature. * @param ... Additional arguments to be passed to the preprocessor callback. */ #define @P@WITH_CPU_BASELINE_CALL(EXEC_CB, ...) \ @WITH_CPU_BASELINE_CALL@ /** * @def @P@WITH_CPU_DISPATCH_CALL(EXEC_CB, ...) * Similar to the above but for enabled dispatched features. * * @param EXEC_CB The preprocessor callback to be called for each enabled dispatched feature. * @param ... Additional arguments to be passed to the preprocessor callback. */ #define @P@WITH_CPU_DISPATCH_CALL(EXEC_CB, ...) \ @WITH_CPU_DISPATCH_CALL@ /* * Defines the default behavior for the configurable macros derived from the configuration header * that is generated by the meson function `mod_features.multi_targets()`. * * These macros are replaced by disapatch config headers once its included. */ #define @P@MTARGETS_CONF_BASELINE(CB, ...) \ &&"Expected config header that generated by mod_features.multi_targets()"; #define @P@MTARGETS_CONF_DISPATCH(TEST_FEATURE_CB, CB, ...) \ &&"Expected config header that generated by mod_features.multi_targets()"; /** * @def @P@CPU_DISPATCH_CURFX(NAME) * * Returns `NAME` suffixed with "_" + "the current target" during compiling * the generated static libraries that are derived from the Meson function * `mod_features.multi_targets()`. * * It also returns `NAME` as-is without any suffix when it comes to the baseline features or * in case if the optimization is disabled. * * Note: `mod_features.multi_targets()` provides a unique target name within the compiler #definition * called `@P@MTARGETS_CURRENT` on each generated library based on the specified features * within its parameter 'dispatch:'. * * For example: * * @code * # from meson * mod_features.multi_targets( * 'arithmetic.dispatch.h', 'arithmetic.c', * baseline: [SSE3], dispatch: [AVX512_SKX, AVX2], * prefix: '@P@' * ) * @code * * @code * void @P@CPU_DISPATCH_CURFX(add)(const int *src0, const int *src1, int *dst) * { * #ifdef @P@HAVE_AVX512F // one of the implied feature of AVX512_SKX * // code * #elif defined(@P@HAVE_AVX2) * // code * #elif defined(@P@HAVE_SSE3) * // CODE * #else * // Fallback code in case of no features enabled * #endif * } * @endif * * // Unwrapped version : * void add_AVX512_SKX(const int *src0, const int *src1, int *dst) * {...} * void add_AVX2(const int *src0, const int *src1, int *dst) * {...} * // baseline * void add(const int *src0, const int *src1, int *dst) * {...} * @endcode * * @param NAME The base name of the dispatched function or variable. */ #ifdef @P@MTARGETS_CURRENT // '@P@MTARGETS_CURRENT': only defined by the dispatch targets // within the meson function `mod_features.multi_targets()` #define @P@CPU_DISPATCH_CURFX(NAME) @P@_CPU_CAT(@P@_CPU_CAT(NAME, _), @P@MTARGETS_CURRENT) #else #define @P@CPU_DISPATCH_CURFX(NAME) @P@_CPU_EXPAND(NAME) #endif /** * @def @P@CPU_DISPATCH_DECLARE(...) * * Provides forward declarations for the exported variables and functions * based on the enabled baseline and dispatched features. * * This macro requires include the config file that been generated * by meson function `mod_features.multi_targets()` to determine the enabled * baseline and dispatched features. * * For example: * * @code * # from meson * mod_features.multi_targets( * 'arithmetic.dispatch.h', 'arithmetic.c', * baseline: [SSE3], dispatch: [AVX512_SKX, AVX2], * prefix: '@P@' * ) * @code * * @code * // from C * #include "arithmetic.dispatch.h" * @P@CPU_DISPATCH_DECLARE(void add, (const int *src0, const int *src1, int *dst)) * * // Unwrapped version: * void add_AVX512_SKX(const int *src0, const int *src1, int *dst); * void add_AVX2(const int *src0, const int *src1, int *dst); * void add(const int *src0, const int *src1, int *dst); // baseline * @endcode * * @param ... The function or variable prototype to be declared, * with the target-specific suffix added automatically. */ #define @P@CPU_DISPATCH_DECLARE(...) \ @P@MTARGETS_CONF_DISPATCH(@P@CPU_DISPATCH_DECLARE_CHK_, @P@CPU_DISPATCH_DECLARE_CB_, __VA_ARGS__) \ @P@MTARGETS_CONF_BASELINE(@P@CPU_DISPATCH_DECLARE_BASE_CB_, __VA_ARGS__) // Preprocessor callbacks #define @P@CPU_DISPATCH_DECLARE_CB_(DUMMY, TARGET_NAME, LEFT, ...) \ @P@_CPU_CAT(@P@_CPU_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__; #define @P@CPU_DISPATCH_DECLARE_BASE_CB_(LEFT, ...) \ LEFT __VA_ARGS__; // Dummy CPU runtime checking #define @P@CPU_DISPATCH_DECLARE_CHK_(FEATURE_NAME) /** * @def @P@CPU_DISPATCH_DECLARE_XB(LEFT, ...) * * Same as `@P@CPU_DISPATCH_DECLARE` but exclude the baseline declaration even * if it was enabled within `mod_features.multi_targets()`. */ #define @P@CPU_DISPATCH_DECLARE_XB(...) \ @P@MTARGETS_CONF_DISPATCH(@P@CPU_DISPATCH_DECLARE_CHK_, @P@CPU_DISPATCH_DECLARE_CB_, __VA_ARGS__) /** * @def @P@CPU_DISPATCH_CALL(...) * * Helper macro used for runtime dispatching of the exported functions and variables * within the meson `mod_features.multi_targets()` function. * * This macro dispatches only one symbol based on the order of the specified features within the meson function * `mod_features.multi_targets()`. For example, if `mod_features.multi_targets()` is called with * `dispatch: [features_highest_1, features_highest_2]`, the macro will test each enabled feature against * the CPU at runtime. Once it fails, it will move to the next order until falling back to the baseline. * * Similar to `@P@CPU_DISPATCH_DECLARE`, this macro requires including the config file that has been generated * by the meson function `mod_features.multi_targets()` to determine the enabled baseline and dispatched features. * * Example usage: * * @code * # from meson * mod_features.multi_targets( * 'arithmetic.dispatch.h', 'arithmetic.c', * baseline: [SSE3], dispatch: [AVX512_SKX, AVX2], * prefix: '@P@' * ) * @endcode * * @code * // from C * #include "arithmetic.dispatch.h" * * // Example 1: * @P@CPU_DISPATCH_CALL(add, (src0, src1, dst)); * * // Unwrapped version: * @P@CPU_HAVE(AVX512_SKX) ? add_AVX512_SKX(src0, src1, dst) : * (@P@CPU_HAVE(AVX2) ? add_AVX2(src0, src1, dst) : * add(src0, src1, dst); // baseline * * // Example 2: * typedef void (*func_type)(const int*, const int*, int*); * func_type func = @P@CPU_DISPATCH_CALL(add); * * // Unwrapped version: * func_type func2 = @P@CPU_HAVE(AVX512_SKX) ? add_AVX512_SKX : * (@P@CPU_HAVE(AVX2) ? add_AVX2 : * add; // baseline * * // Example 3: * func_type func3; * @P@CPU_DISPATCH_CALL(func3 = add); * * // Unwrapped version: * func_type func2 = @P@CPU_HAVE(AVX512_SKX) ? func3 = add_AVX512_SKX : * (@P@CPU_HAVE(AVX2) ? func3 = add_AVX2 : * func3 = add; // baseline * * @endcode * * @param ... The function or variable prototype to be called or assigned, * with the target-specific suffix added automatically. */ #define @P@CPU_DISPATCH_CALL(...) \ @P@MTARGETS_CONF_DISPATCH(@P@CPU_HAVE, @P@CPU_DISPATCH_CALL_CB_, __VA_ARGS__) \ @P@MTARGETS_CONF_BASELINE(@P@CPU_DISPATCH_CALL_BASE_CB_, __VA_ARGS__) // Preprocessor callbacks #define @P@CPU_DISPATCH_CALL_CB_(TESTED_FEATURES, TARGET_NAME, LEFT, ...) \ (TESTED_FEATURES) ? (@P@_CPU_CAT(@P@_CPU_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__) : #define @P@CPU_DISPATCH_CALL_BASE_CB_(LEFT, ...) \ (LEFT __VA_ARGS__) /** * @def @P@CPU_DISPATCH_CALL_XB(LEFT, ...) * * Same as `@P@CPU_DISPATCH_CALL` but exclude the baseline call even * if it was provided within meson `mod_features.multi_targets()`. * * Note: This macro returns void */ #define @P@CPU_DISPATCH_CALL_XB_CB_(TESTED_FEATURES, TARGET_NAME, LEFT, ...) \ (TESTED_FEATURES) ? (void) (@P@_CPU_CAT(@P@_CPU_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__) : #define @P@CPU_DISPATCH_CALL_XB(...) \ @P@MTARGETS_CONF_DISPATCH(@P@CPU_HAVE, @P@CPU_DISPATCH_CALL_XB_CB_, __VA_ARGS__) \ ((void) 0 /* discarded expression value */) /** * @def @P@CPU_DISPATCH_INFO(...) * * Returns an array of two strings containing the enabled target names * in each multi-target source. * * The first item represents the currently dispatched target, * while the second item contains the available targets that * can potentially be dispatched based on CPU capabilities. * * @code * #include "arithmetic.dispatch.h" // generated config file * const char *enabled_targets[] = @P@CPU_DISPATCH_INFO(); * * printf("Current dispatched target: %s\n", enabled_targets[0]); * printf("Available targets: %s\n", enabled_targets[1]); * @endcode */ #define @P@CPU_DISPATCH_INFO() \ { \ @P@MTARGETS_CONF_DISPATCH(@P@CPU_HAVE, @P@CPU_DISPATCH_INFO_HIGH_CB_, DUMMY) \ @P@MTARGETS_CONF_BASELINE(@P@CPU_DISPATCH_INFO_BASE_HIGH_CB_, DUMMY) \ "", \ @P@MTARGETS_CONF_DISPATCH(@P@CPU_HAVE, @P@CPU_DISPATCH_INFO_CB_, DUMMY) \ @P@MTARGETS_CONF_BASELINE(@P@CPU_DISPATCH_INFO_BASE_CB_, DUMMY) \ ""\ } #define @P@CPU_DISPATCH_INFO_HIGH_CB_(TESTED_FEATURES, TARGET_NAME, ...) \ (TESTED_FEATURES) ? @P@_CPU_TOSTRING(TARGET_NAME) : #define @P@CPU_DISPATCH_INFO_BASE_HIGH_CB_(...) \ (1) ? "baseline(" @P@WITH_CPU_BASELINE ")" : // Preprocessor callbacks #define @P@CPU_DISPATCH_INFO_CB_(TESTED_FEATURES, TARGET_NAME, ...) \ @P@_CPU_TOSTRING(TARGET_NAME) " " #define @P@CPU_DISPATCH_INFO_BASE_CB_(...) \ "baseline(" @P@WITH_CPU_BASELINE ")" /** * Macro @P@CPU_DISPATCH_CALL_ALL(...) * * Same as `@P@CPU_DISPATCH_CALL` but dispatching all the required optimizations for * the exported functions and variables instead of highest interested one. * Returns void. */ #define @P@CPU_DISPATCH_CALL_ALL(...) \ (@P@MTARGETS_CONF_DISPATCH(@P@CPU_HAVE, @P@CPU_DISPATCH_CALL_ALL_CB_, __VA_ARGS__) \ @P@MTARGETS_CONF_BASELINE(@P@CPU_DISPATCH_CALL_ALL_BASE_CB_, __VA_ARGS__)) // Preprocessor callbacks #define @P@CPU_DISPATCH_CALL_ALL_CB_(TESTED_FEATURES, TARGET_NAME, LEFT, ...) \ ((TESTED_FEATURES) ? (@P@_CPU_CAT(@P@_CPU_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__) : (void) 0), #define @P@CPU_DISPATCH_CALL_ALL_BASE_CB_(LEFT, ...) \ ( LEFT __VA_ARGS__ ) // Brings the headers files of enabled CPU features #ifdef @P@HAVE_SSE #include #endif #ifdef @P@HAVE_SSE2 #include #endif #ifdef @P@HAVE_SSE3 #include #endif #ifdef @P@HAVE_SSSE3 #include #endif #ifdef @P@HAVE_SSE41 #include #endif #ifdef @P@HAVE_POPCNT #ifdef _MSC_VER #include #else #include #endif #endif #ifdef @P@HAVE_AVX #include #endif #if defined(@P@HAVE_XOP) || defined(@P@HAVE_FMA4) #include #endif #if (defined(@P@HAVE_VSX) || defined(@P@HAVE_VX)) && !defined(__cplusplus) && defined(bool) /* * "altivec.h" header contains the definitions(bool, vector, pixel), * usually in c++ we undefine them after including the header. * It's better anyway to take them off and use built-in types(__vector, __pixel, __bool) instead, * since c99 supports bool variables which may lead to ambiguous errors. */ // backup 'bool' before including 'npy_cpu_dispatch_config.h', since it may not defined as a compiler token. #define NPY__CPU_DISPATCH_GUARD_BOOL typedef bool npy__cpu_dispatch_guard_bool; #endif #ifdef @P@HAVE_VSX #include #endif #ifdef @P@HAVE_VX #include #endif #if (defined(@P@HAVE_VSX) || defined(@P@HAVE_VX)) #undef bool #undef vector #undef pixel #ifdef NPY__CPU_DISPATCH_GUARD_BOOL #define bool npy__cpu_dispatch_guard_bool #undef NPY__CPU_DISPATCH_GUARD_BOOL #endif #endif #ifdef @P@HAVE_NEON #include #endif #ifdef @P@HAVE_RVV #include #endif #ifdef @P@HAVE_LSX #include #endif #endif // @P@_CPU_DISPATCHER_CONF_H_