#ifndef NUMPY_CORE_SRC_COMMON_LOWLEVEL_STRIDED_LOOPS_H_ #define NUMPY_CORE_SRC_COMMON_LOWLEVEL_STRIDED_LOOPS_H_ #include "common.h" #include "npy_config.h" #include "array_method.h" #include "dtype_transfer.h" #include "mem_overlap.h" #include "mapping.h" /* For PyArray_ macros used below */ #include "numpy/ndarrayobject.h" /* * NOTE: This API should remain private for the time being, to allow * for further refinement. I think the 'aligned' mechanism * needs changing, for example. * * Note: Updated in 2018 to distinguish "true" from "uint" alignment. */ /* * This function pointer is for unary operations that input an * arbitrarily strided one-dimensional array segment and output * an arbitrarily strided array segment of the same size. * It may be a fully general function, or a specialized function * when the strides or item size have particular known values. * * Examples of unary operations are a straight copy, a byte-swap, * and a casting operation, * * The 'transferdata' parameter is slightly special, following a * generic auxiliary data pattern defined in ndarraytypes.h * Use NPY_AUXDATA_CLONE and NPY_AUXDATA_FREE to deal with this data. * */ // TODO: FIX! That comment belongs to something now in array-method /* * This is for pointers to functions which behave exactly as * for PyArrayMethod_StridedLoop, but with an additional mask controlling * which values are transformed. * * TODO: We should move this mask "capability" to the ArrayMethod itself * probably. Although for NumPy internal things this works decently, * and exposing it there should be well thought out to be useful beyond * NumPy if possible. * * In particular, the 'i'-th element is operated on if and only if * mask[i*mask_stride] is true. */ typedef int (PyArray_MaskedStridedUnaryOp)( PyArrayMethod_Context *context, char *const *args, const npy_intp *dimensions, const npy_intp *strides, npy_bool *mask, npy_intp mask_stride, NpyAuxData *auxdata); /* * Gives back a function pointer to a specialized function for copying * strided memory. Returns NULL if there is a problem with the inputs. * * aligned: * Should be 1 if the src and dst pointers always point to * locations at which a uint of equal size to dtype->elsize * would be aligned, 0 otherwise. * src_stride: * Should be the src stride if it will always be the same, * NPY_MAX_INTP otherwise. * dst_stride: * Should be the dst stride if it will always be the same, * NPY_MAX_INTP otherwise. * itemsize: * Should be the item size if it will always be the same, 0 otherwise. * */ NPY_NO_EXPORT PyArrayMethod_StridedLoop * PyArray_GetStridedCopyFn(int aligned, npy_intp src_stride, npy_intp dst_stride, npy_intp itemsize); /* * Gives back a function pointer to a specialized function for copying * and swapping strided memory. This assumes each element is a single * value to be swapped. * * For information on the 'aligned', 'src_stride' and 'dst_stride' parameters * see above. * * Parameters are as for PyArray_GetStridedCopyFn. */ NPY_NO_EXPORT PyArrayMethod_StridedLoop * PyArray_GetStridedCopySwapFn(int aligned, npy_intp src_stride, npy_intp dst_stride, npy_intp itemsize); /* * Gives back a function pointer to a specialized function for copying * and swapping strided memory. This assumes each element is a pair * of values, each of which needs to be swapped. * * For information on the 'aligned', 'src_stride' and 'dst_stride' parameters * see above. * * Parameters are as for PyArray_GetStridedCopyFn. */ NPY_NO_EXPORT PyArrayMethod_StridedLoop * PyArray_GetStridedCopySwapPairFn(int aligned, npy_intp src_stride, npy_intp dst_stride, npy_intp itemsize); /* * Gives back a transfer function and transfer data pair which copies * the data from source to dest, truncating it if the data doesn't * fit, and padding with zero bytes if there's too much space. * * For information on the 'aligned', 'src_stride' and 'dst_stride' parameters * see above. * * Returns NPY_SUCCEED or NPY_FAIL */ NPY_NO_EXPORT int PyArray_GetStridedZeroPadCopyFn(int aligned, int unicode_swap, npy_intp src_stride, npy_intp dst_stride, npy_intp src_itemsize, npy_intp dst_itemsize, PyArrayMethod_StridedLoop **outstransfer, NpyAuxData **outtransferdata); /* * For casts between built-in numeric types, * this produces a function pointer for casting from src_type_num * to dst_type_num. If a conversion is unsupported, returns NULL * without setting a Python exception. */ NPY_NO_EXPORT PyArrayMethod_StridedLoop * PyArray_GetStridedNumericCastFn(int aligned, npy_intp src_stride, npy_intp dst_stride, int src_type_num, int dst_type_num); /* * Gets an operation which copies elements of the given dtype, * swapping if the dtype isn't in NBO. * * Returns NPY_SUCCEED or NPY_FAIL */ NPY_NO_EXPORT int PyArray_GetDTypeCopySwapFn(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *dtype, PyArrayMethod_StridedLoop **outstransfer, NpyAuxData **outtransferdata); /* * If it's possible, gives back a transfer function which casts and/or * byte swaps data with the dtype 'src_dtype' into data with the dtype * 'dst_dtype'. If the outtransferdata is populated with a non-NULL value, * it must be deallocated with the NPY_AUXDATA_FREE * function when the transfer function is no longer required. * * aligned: * Should be 1 if the src and dst pointers always point to * locations at which a uint of equal size to dtype->elsize * would be aligned, 0 otherwise. * src_stride: * Should be the src stride if it will always be the same, * NPY_MAX_INTP otherwise. * dst_stride: * Should be the dst stride if it will always be the same, * NPY_MAX_INTP otherwise. * src_dtype: * The data type of source data. Must not be NULL. * dst_dtype: * The data type of destination data. If this is NULL and * move_references is 1, a transfer function which decrements * source data references is produced. * move_references: * If 0, the destination data gets new reference ownership. * If 1, the references from the source data are moved to * the destination data. * cast_info: * A pointer to an (uninitialized) `NPY_cast_info` struct, the caller * must call `NPY_cast_info_xfree` on it (except on error) and handle * its memory livespan. * out_needs_api: * If this is non-NULL, and the transfer function produced needs * to call into the (Python) API, this gets set to 1. This * remains untouched if no API access is required. * * WARNING: If you set move_references to 1, it is best that src_stride is * never zero when calling the transfer function. Otherwise, the * first destination reference will get the value and all the rest * will get NULL. * * Returns NPY_SUCCEED or NPY_FAIL. */ NPY_NO_EXPORT int PyArray_GetDTypeTransferFunction(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references, NPY_cast_info *cast_info, NPY_ARRAYMETHOD_FLAGS *out_flags); NPY_NO_EXPORT int get_fields_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references, PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *out_flags); NPY_NO_EXPORT int get_subarray_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references, PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *out_flags); /* * This is identical to PyArray_GetDTypeTransferFunction, but returns a * transfer function which also takes a mask as a parameter. The mask is used * to determine which values to copy, and data is transferred exactly when * mask[i*mask_stride] is true. * * If move_references is true, values which are not copied to the * destination will still have their source reference decremented. * * If mask_dtype is NPY_BOOL or NPY_UINT8, each full element is either * transferred or not according to the mask as described above. If * dst_dtype and mask_dtype are both struct dtypes, their names must * match exactly, and the dtype of each leaf field in mask_dtype must * be either NPY_BOOL or NPY_UINT8. */ NPY_NO_EXPORT int PyArray_GetMaskedDTypeTransferFunction(int aligned, npy_intp src_stride, npy_intp dst_stride, npy_intp mask_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, PyArray_Descr *mask_dtype, int move_references, NPY_cast_info *cast_info, NPY_ARRAYMETHOD_FLAGS *out_flags); /* * Casts the specified number of elements from 'src' with data type * 'src_dtype' to 'dst' with 'dst_dtype'. See * PyArray_GetDTypeTransferFunction for more details. * * Returns NPY_SUCCEED or NPY_FAIL. */ NPY_NO_EXPORT int PyArray_CastRawArrays(npy_intp count, char *src, char *dst, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references); /* * These two functions copy or convert the data of an n-dimensional array * to/from a 1-dimensional strided buffer. These functions will only call * 'stransfer' with the provided dst_stride/src_stride and * dst_strides[0]/src_strides[0], so the caller can use those values to * specialize the function. * Note that even if ndim == 0, everything needs to be set as if ndim == 1. * * The return value is the number of elements it couldn't copy. A return value * of 0 means all elements were copied, a larger value means the end of * the n-dimensional array was reached before 'count' elements were copied. * A negative return value indicates an error occurred. * * ndim: * The number of dimensions of the n-dimensional array. * dst/src/mask: * The destination, source or mask starting pointer. * dst_stride/src_stride/mask_stride: * The stride of the 1-dimensional strided buffer * dst_strides/src_strides: * The strides of the n-dimensional array. * dst_strides_inc/src_strides_inc: * How much to add to the ..._strides pointer to get to the next stride. * coords: * The starting coordinates in the n-dimensional array. * coords_inc: * How much to add to the coords pointer to get to the next coordinate. * shape: * The shape of the n-dimensional array. * shape_inc: * How much to add to the shape pointer to get to the next shape entry. * count: * How many elements to transfer * src_itemsize: * How big each element is. If transferring between elements of different * sizes, for example a casting operation, the 'stransfer' function * should be specialized for that, in which case 'stransfer' will use * this parameter as the source item size. * cast_info: * Pointer to the NPY_cast_info struct which summarizes all information * necessary to perform a cast. */ NPY_NO_EXPORT npy_intp PyArray_TransferNDimToStrided(npy_intp ndim, char *dst, npy_intp dst_stride, char *src, npy_intp const *src_strides, npy_intp src_strides_inc, npy_intp const *coords, npy_intp coords_inc, npy_intp const *shape, npy_intp shape_inc, npy_intp count, npy_intp src_itemsize, NPY_cast_info *cast_info); NPY_NO_EXPORT npy_intp PyArray_TransferStridedToNDim(npy_intp ndim, char *dst, npy_intp const *dst_strides, npy_intp dst_strides_inc, char *src, npy_intp src_stride, npy_intp const *coords, npy_intp coords_inc, npy_intp const *shape, npy_intp shape_inc, npy_intp count, npy_intp src_itemsize, NPY_cast_info *cast_info); NPY_NO_EXPORT npy_intp PyArray_TransferMaskedStridedToNDim(npy_intp ndim, char *dst, npy_intp const *dst_strides, npy_intp dst_strides_inc, char *src, npy_intp src_stride, npy_bool *mask, npy_intp mask_stride, npy_intp const *coords, npy_intp coords_inc, npy_intp const *shape, npy_intp shape_inc, npy_intp count, npy_intp src_itemsize, NPY_cast_info *cast_info); NPY_NO_EXPORT int mapiter_trivial_get( PyArrayObject *self, PyArrayObject *ind, PyArrayObject *result, int is_aligned, NPY_cast_info *cast_info); NPY_NO_EXPORT int mapiter_trivial_set( PyArrayObject *self, PyArrayObject *ind, PyArrayObject *result, int is_aligned, NPY_cast_info *cast_info); NPY_NO_EXPORT int mapiter_get( PyArrayMapIterObject *mit, NPY_cast_info *cast_info, NPY_ARRAYMETHOD_FLAGS flags, int is_aligned); NPY_NO_EXPORT int mapiter_set( PyArrayMapIterObject *mit, NPY_cast_info *cast_info, NPY_ARRAYMETHOD_FLAGS flags, int is_aligned); /* * Prepares shape and strides for a simple raw array iteration. * This sorts the strides into FORTRAN order, reverses any negative * strides, then coalesces axes where possible. The results are * filled in the output parameters. * * This is intended for simple, lightweight iteration over arrays * where no buffering of any kind is needed, and the array may * not be stored as a PyArrayObject. * * You can use this together with NPY_RAW_ITER_START and * NPY_RAW_ITER_ONE_NEXT to handle the looping boilerplate of everything * but the innermost loop (which is for idim == 0). * * Returns 0 on success, -1 on failure. */ NPY_NO_EXPORT int PyArray_PrepareOneRawArrayIter(int ndim, npy_intp const *shape, char *data, npy_intp const *strides, int *out_ndim, npy_intp *out_shape, char **out_data, npy_intp *out_strides); /* * The same as PyArray_PrepareOneRawArrayIter, but for two * operands instead of one. Any broadcasting of the two operands * should have already been done before calling this function, * as the ndim and shape is only specified once for both operands. * * Only the strides of the first operand are used to reorder * the dimensions, no attempt to consider all the strides together * is made, as is done in the NpyIter object. * * You can use this together with NPY_RAW_ITER_START and * NPY_RAW_ITER_TWO_NEXT to handle the looping boilerplate of everything * but the innermost loop (which is for idim == 0). * * Returns 0 on success, -1 on failure. */ NPY_NO_EXPORT int PyArray_PrepareTwoRawArrayIter(int ndim, npy_intp const *shape, char *dataA, npy_intp const *stridesA, char *dataB, npy_intp const *stridesB, int *out_ndim, npy_intp *out_shape, char **out_dataA, npy_intp *out_stridesA, char **out_dataB, npy_intp *out_stridesB); /* * The same as PyArray_PrepareOneRawArrayIter, but for three * operands instead of one. Any broadcasting of the three operands * should have already been done before calling this function, * as the ndim and shape is only specified once for all operands. * * Only the strides of the first operand are used to reorder * the dimensions, no attempt to consider all the strides together * is made, as is done in the NpyIter object. * * You can use this together with NPY_RAW_ITER_START and * NPY_RAW_ITER_THREE_NEXT to handle the looping boilerplate of everything * but the innermost loop (which is for idim == 0). * * Returns 0 on success, -1 on failure. */ NPY_NO_EXPORT int PyArray_PrepareThreeRawArrayIter(int ndim, npy_intp const *shape, char *dataA, npy_intp const *stridesA, char *dataB, npy_intp const *stridesB, char *dataC, npy_intp const *stridesC, int *out_ndim, npy_intp *out_shape, char **out_dataA, npy_intp *out_stridesA, char **out_dataB, npy_intp *out_stridesB, char **out_dataC, npy_intp *out_stridesC); /* * Return number of elements that must be peeled from the start of 'addr' with * 'nvals' elements of size 'esize' in order to reach blockable alignment. * The required alignment in bytes is passed as the 'alignment' argument and * must be a power of two. This function is used to prepare an array for * blocking. See the 'npy_blocked_end' function documentation below for an * example of how this function is used. */ static inline npy_intp npy_aligned_block_offset(const void * addr, const npy_uintp esize, const npy_uintp alignment, const npy_uintp nvals) { npy_uintp offset, peel; offset = (npy_uintp)addr & (alignment - 1); peel = offset ? (alignment - offset) / esize : 0; peel = (peel <= nvals) ? peel : nvals; assert(peel <= NPY_MAX_INTP); return (npy_intp)peel; } /* * Return upper loop bound for an array of 'nvals' elements * of size 'esize' peeled by 'offset' elements and blocking to * a vector size of 'vsz' in bytes * * example usage: * npy_intp i; * double v[101]; * npy_intp esize = sizeof(v[0]); * npy_intp peel = npy_aligned_block_offset(v, esize, 16, n); * // peel to alignment 16 * for (i = 0; i < peel; i++) * * // simd vectorized operation * for (; i < npy_blocked_end(peel, esize, 16, n); i += 16 / esize) * * // handle scalar rest * for(; i < n; i++) * */ static inline npy_intp npy_blocked_end(const npy_uintp peel, const npy_uintp esize, const npy_uintp vsz, const npy_uintp nvals) { npy_uintp ndiff = nvals - peel; npy_uintp res = (ndiff - ndiff % (vsz / esize)); assert(nvals >= peel); assert(res <= NPY_MAX_INTP); return (npy_intp)(res); } /* byte swapping functions */ static inline npy_uint16 npy_bswap2(npy_uint16 x) { return ((x & 0xffu) << 8) | (x >> 8); } /* * treat as int16 and byteswap unaligned memory, * some cpus don't support unaligned access */ static inline void npy_bswap2_unaligned(char * x) { char a = x[0]; x[0] = x[1]; x[1] = a; } static inline npy_uint32 npy_bswap4(npy_uint32 x) { #ifdef HAVE___BUILTIN_BSWAP32 return __builtin_bswap32(x); #else return ((x & 0xffu) << 24) | ((x & 0xff00u) << 8) | ((x & 0xff0000u) >> 8) | (x >> 24); #endif } static inline void npy_bswap4_unaligned(char * x) { char a = x[0]; x[0] = x[3]; x[3] = a; a = x[1]; x[1] = x[2]; x[2] = a; } static inline npy_uint64 npy_bswap8(npy_uint64 x) { #ifdef HAVE___BUILTIN_BSWAP64 return __builtin_bswap64(x); #else return ((x & 0xffULL) << 56) | ((x & 0xff00ULL) << 40) | ((x & 0xff0000ULL) << 24) | ((x & 0xff000000ULL) << 8) | ((x & 0xff00000000ULL) >> 8) | ((x & 0xff0000000000ULL) >> 24) | ((x & 0xff000000000000ULL) >> 40) | ( x >> 56); #endif } static inline void npy_bswap8_unaligned(char * x) { char a = x[0]; x[0] = x[7]; x[7] = a; a = x[1]; x[1] = x[6]; x[6] = a; a = x[2]; x[2] = x[5]; x[5] = a; a = x[3]; x[3] = x[4]; x[4] = a; } /* Start raw iteration */ #define NPY_RAW_ITER_START(idim, ndim, coord, shape) \ memset((coord), 0, (ndim) * sizeof(coord[0])); \ do { /* Increment to the next n-dimensional coordinate for one raw array */ #define NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides) \ for ((idim) = 1; (idim) < (ndim); ++(idim)) { \ if (++(coord)[idim] == (shape)[idim]) { \ (coord)[idim] = 0; \ (data) -= ((shape)[idim] - 1) * (strides)[idim]; \ } \ else { \ (data) += (strides)[idim]; \ break; \ } \ } \ } while ((idim) < (ndim)) /* Increment to the next n-dimensional coordinate for two raw arrays */ #define NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape, \ dataA, stridesA, dataB, stridesB) \ for ((idim) = 1; (idim) < (ndim); ++(idim)) { \ if (++(coord)[idim] == (shape)[idim]) { \ (coord)[idim] = 0; \ (dataA) -= ((shape)[idim] - 1) * (stridesA)[idim]; \ (dataB) -= ((shape)[idim] - 1) * (stridesB)[idim]; \ } \ else { \ (dataA) += (stridesA)[idim]; \ (dataB) += (stridesB)[idim]; \ break; \ } \ } \ } while ((idim) < (ndim)) /* Increment to the next n-dimensional coordinate for three raw arrays */ #define NPY_RAW_ITER_THREE_NEXT(idim, ndim, coord, shape, \ dataA, stridesA, \ dataB, stridesB, \ dataC, stridesC) \ for ((idim) = 1; (idim) < (ndim); ++(idim)) { \ if (++(coord)[idim] == (shape)[idim]) { \ (coord)[idim] = 0; \ (dataA) -= ((shape)[idim] - 1) * (stridesA)[idim]; \ (dataB) -= ((shape)[idim] - 1) * (stridesB)[idim]; \ (dataC) -= ((shape)[idim] - 1) * (stridesC)[idim]; \ } \ else { \ (dataA) += (stridesA)[idim]; \ (dataB) += (stridesB)[idim]; \ (dataC) += (stridesC)[idim]; \ break; \ } \ } \ } while ((idim) < (ndim)) /* Increment to the next n-dimensional coordinate for four raw arrays */ #define NPY_RAW_ITER_FOUR_NEXT(idim, ndim, coord, shape, \ dataA, stridesA, \ dataB, stridesB, \ dataC, stridesC, \ dataD, stridesD) \ for ((idim) = 1; (idim) < (ndim); ++(idim)) { \ if (++(coord)[idim] == (shape)[idim]) { \ (coord)[idim] = 0; \ (dataA) -= ((shape)[idim] - 1) * (stridesA)[idim]; \ (dataB) -= ((shape)[idim] - 1) * (stridesB)[idim]; \ (dataC) -= ((shape)[idim] - 1) * (stridesC)[idim]; \ (dataD) -= ((shape)[idim] - 1) * (stridesD)[idim]; \ } \ else { \ (dataA) += (stridesA)[idim]; \ (dataB) += (stridesB)[idim]; \ (dataC) += (stridesC)[idim]; \ (dataD) += (stridesD)[idim]; \ break; \ } \ } \ } while ((idim) < (ndim)) /* * TRIVIAL ITERATION * * In some cases when the iteration order isn't important, iteration over * arrays is trivial. This is the case when: * * The array has 0 or 1 dimensions. * * The array is C or Fortran contiguous. * Use of an iterator can be skipped when this occurs. These macros assist * in detecting and taking advantage of the situation. Note that it may * be worthwhile to further check if the stride is a contiguous stride * and take advantage of that. * * Here is example code for a single array: * * if (PyArray_TRIVIALLY_ITERABLE(self)) { * char *data; * npy_intp count, stride; * * PyArray_PREPARE_TRIVIAL_ITERATION(self, count, data, stride); * * while (count--) { * // Use the data pointer * * data += stride; * } * } * else { * // Create iterator, etc... * } * */ /* * Note: Equivalently iterable macro requires one of arr1 or arr2 be * trivially iterable to be valid. */ /** * Determine whether two arrays are safe for trivial iteration in cases where * some of the arrays may be modified. * * In-place iteration is safe if one of the following is true: * * - Both arrays are read-only * - The arrays do not have overlapping memory (based on a check that may be too * strict) * - The strides match, and the non-read-only array base addresses are equal or * before the read-only one, ensuring correct data dependency. */ #define PyArray_TRIVIALLY_ITERABLE_OP_NOREAD 0 #define PyArray_TRIVIALLY_ITERABLE_OP_READ 1 #define PyArray_TRIVIALLY_ITERABLE(arr) ( \ PyArray_NDIM(arr) <= 1 || \ PyArray_CHKFLAGS(arr, NPY_ARRAY_C_CONTIGUOUS) || \ PyArray_CHKFLAGS(arr, NPY_ARRAY_F_CONTIGUOUS) \ ) #define PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size, arr) ( \ assert(PyArray_TRIVIALLY_ITERABLE(arr)), \ size == 1 ? 0 : ((PyArray_NDIM(arr) == 1) ? \ PyArray_STRIDE(arr, 0) : PyArray_ITEMSIZE(arr))) static inline int PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(PyArrayObject *arr1, PyArrayObject *arr2, int arr1_read, int arr2_read) { npy_intp size1, size2, stride1, stride2; int arr1_ahead = 0, arr2_ahead = 0; if (arr1_read && arr2_read) { return 1; } size1 = PyArray_SIZE(arr1); stride1 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size1, arr1); /* * arr1 == arr2 is common for in-place operations, so we fast-path it here. * TODO: The stride1 != 0 check rejects broadcast arrays. This may affect * self-overlapping arrays, but seems only necessary due to * `try_trivial_single_output_loop` not rejecting broadcast outputs. */ if (arr1 == arr2 && stride1 != 0) { return 1; } if (solve_may_share_memory(arr1, arr2, 1) == 0) { return 1; } /* * Arrays overlapping in memory may be equivalently iterable if input * arrays stride ahead faster than output arrays. */ size2 = PyArray_SIZE(arr2); stride2 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size2, arr2); /* * Arrays with zero stride are never "ahead" since the element is reused * (at this point we know the array extents overlap). */ if (stride1 > 0) { arr1_ahead = (stride1 >= stride2 && PyArray_BYTES(arr1) >= PyArray_BYTES(arr2)); } else if (stride1 < 0) { arr1_ahead = (stride1 <= stride2 && PyArray_BYTES(arr1) <= PyArray_BYTES(arr2)); } if (stride2 > 0) { arr2_ahead = (stride2 >= stride1 && PyArray_BYTES(arr2) >= PyArray_BYTES(arr1)); } else if (stride2 < 0) { arr2_ahead = (stride2 <= stride1 && PyArray_BYTES(arr2) <= PyArray_BYTES(arr1)); } return (!arr1_read || arr1_ahead) && (!arr2_read || arr2_ahead); } #define PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) ( \ PyArray_NDIM(arr1) == PyArray_NDIM(arr2) && \ PyArray_CompareLists(PyArray_DIMS(arr1), \ PyArray_DIMS(arr2), \ PyArray_NDIM(arr1)) && \ (PyArray_FLAGS(arr1)&(NPY_ARRAY_C_CONTIGUOUS| \ NPY_ARRAY_F_CONTIGUOUS)) & \ (PyArray_FLAGS(arr2)&(NPY_ARRAY_C_CONTIGUOUS| \ NPY_ARRAY_F_CONTIGUOUS)) \ ) #define PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2, arr1_read, arr2_read) ( \ PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) && \ PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK( \ arr1, arr2, arr1_read, arr2_read)) #define PyArray_PREPARE_TRIVIAL_ITERATION(arr, count, data, stride) \ count = PyArray_SIZE(arr); \ data = PyArray_BYTES(arr); \ stride = ((PyArray_NDIM(arr) == 0) ? 0 : \ ((PyArray_NDIM(arr) == 1) ? \ PyArray_STRIDE(arr, 0) : \ PyArray_ITEMSIZE(arr))); #define PyArray_PREPARE_TRIVIAL_PAIR_ITERATION(arr1, arr2, \ count, \ data1, data2, \ stride1, stride2) { \ npy_intp size1 = PyArray_SIZE(arr1); \ npy_intp size2 = PyArray_SIZE(arr2); \ count = ((size1 > size2) || size1 == 0) ? size1 : size2; \ data1 = PyArray_BYTES(arr1); \ data2 = PyArray_BYTES(arr2); \ stride1 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size1, arr1); \ stride2 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size2, arr2); \ } #endif /* NUMPY_CORE_SRC_COMMON_LOWLEVEL_STRIDED_LOOPS_H_ */