/* * This is a PRIVATE INTERNAL NumPy header, intended to be used *ONLY* * by the iterator implementation code. All other internal NumPy code * should use the exposed iterator API. */ #ifndef NPY_ITERATOR_IMPLEMENTATION_CODE #error This header is intended for use ONLY by iterator implementation code. #endif #ifndef NUMPY_CORE_SRC_MULTIARRAY_NDITER_IMPL_H_ #define NUMPY_CORE_SRC_MULTIARRAY_NDITER_IMPL_H_ #define NPY_NO_DEPRECATED_API NPY_API_VERSION #define _MULTIARRAYMODULE #define PY_SSIZE_T_CLEAN #include #include #include "numpy/arrayobject.h" #include "convert_datatype.h" #include "lowlevel_strided_loops.h" #include "dtype_transfer.h" #include "dtype_traversal.h" /********** ITERATOR CONSTRUCTION TIMING **************/ #define NPY_IT_CONSTRUCTION_TIMING 0 #if NPY_IT_CONSTRUCTION_TIMING #define NPY_IT_TIME_POINT(var) { \ unsigned int hi, lo; \ __asm__ __volatile__ ( \ "rdtsc" \ : "=d" (hi), "=a" (lo)); \ var = (((unsigned long long)hi) << 32) | lo; \ } #define NPY_IT_PRINT_TIME_START(var) { \ printf("%30s: start\n", #var); \ c_temp = var; \ } #define NPY_IT_PRINT_TIME_VAR(var) { \ printf("%30s: %6.0f clocks\n", #var, \ ((double)(var-c_temp))); \ c_temp = var; \ } #else #define NPY_IT_TIME_POINT(var) #endif /******************************************************/ /********** PRINTF DEBUG TRACING **************/ #define NPY_IT_DBG_TRACING 0 /* TODO: Can remove the n-args macros, old C89 didn't have variadic macros. */ #if NPY_IT_DBG_TRACING #define NPY_IT_DBG_PRINT(...) printf(__VA_ARGS__) #define NPY_IT_DBG_PRINT1(s, p1) NPY_IT_DBG_PRINT(s, p1) #define NPY_IT_DBG_PRINT2(s, p1, p2) NPY_IT_DBG_PRINT(s, p1, p2) #define NPY_IT_DBG_PRINT3(s, p1, p2, p3) NPY_IT_DBG_PRINT(s, p1, p2, p3) #else #define NPY_IT_DBG_PRINT(...) #define NPY_IT_DBG_PRINT1(s, p1) #define NPY_IT_DBG_PRINT2(s, p1, p2) #define NPY_IT_DBG_PRINT3(s, p1, p2, p3) #endif /**********************************************/ /* Rounds up a number of bytes to be divisible by sizeof intptr_t */ #if NPY_SIZEOF_PY_INTPTR_T == 4 #define NPY_PTR_ALIGNED(size) ((size + 0x3)&(-0x4)) #else #define NPY_PTR_ALIGNED(size) ((size + 0x7)&(-0x8)) #endif /* Internal iterator flags */ /* The perm is the identity */ #define NPY_ITFLAG_IDENTPERM (1 << 0) /* The perm has negative entries (indicating flipped axes) */ #define NPY_ITFLAG_NEGPERM (1 << 1) /* The iterator is tracking an index */ #define NPY_ITFLAG_HASINDEX (1 << 2) /* The iterator is tracking a multi-index */ #define NPY_ITFLAG_HASMULTIINDEX (1 << 3) /* The iteration order was forced on construction */ #define NPY_ITFLAG_FORCEDORDER (1 << 4) /* The inner loop is handled outside the iterator */ #define NPY_ITFLAG_EXLOOP (1 << 5) /* The iterator is ranged */ #define NPY_ITFLAG_RANGE (1 << 6) /* The iterator is buffered */ #define NPY_ITFLAG_BUFFER (1 << 7) /* The iterator should grow the buffered inner loop when possible */ #define NPY_ITFLAG_GROWINNER (1 << 8) /* There is just one iteration, can specialize iternext for that */ #define NPY_ITFLAG_ONEITERATION (1 << 9) /* Delay buffer allocation until first Reset* call */ #define NPY_ITFLAG_DELAYBUF (1 << 10) /* Iteration includes one or more operands being reduced */ #define NPY_ITFLAG_REDUCE (1 << 11) /* Reduce iteration doesn't need to recalculate reduce loops next time */ #define NPY_ITFLAG_REUSE_REDUCE_LOOPS (1 << 12) /* * Offset of (combined) ArrayMethod flags for all transfer functions. * For now, we use the top 8 bits. */ #define NPY_ITFLAG_TRANSFERFLAGS_SHIFT 24 /* Internal iterator per-operand iterator flags */ /* The operand will be written to */ #define NPY_OP_ITFLAG_WRITE 0x0001 /* The operand will be read from */ #define NPY_OP_ITFLAG_READ 0x0002 /* The operand needs type conversion/byte swapping/alignment */ #define NPY_OP_ITFLAG_CAST 0x0004 /* The operand never needs buffering (implies BUF_SINGLESTRIDE) */ #define NPY_OP_ITFLAG_BUFNEVER 0x0008 /* Whether the buffer filling can use a single stride (minus reduce if reduce) */ #define NPY_OP_ITFLAG_BUF_SINGLESTRIDE 0x0010 /* The operand is being reduced */ #define NPY_OP_ITFLAG_REDUCE 0x0020 /* The operand is for temporary use, does not have a backing array */ #define NPY_OP_ITFLAG_VIRTUAL 0x0040 /* The operand requires masking when copying buffer -> array */ #define NPY_OP_ITFLAG_WRITEMASKED 0x0080 /* * Whether the buffer is *fully* filled and thus ready for reuse. * (Must check if the start pointer matches until copy-from-buffer checks) */ #define NPY_OP_ITFLAG_BUF_REUSABLE 0x0100 /* The operand must be copied (with UPDATEIFCOPY if also ITFLAG_WRITE) */ #define NPY_OP_ITFLAG_FORCECOPY 0x0200 /* The operand has temporary data, write it back at dealloc */ #define NPY_OP_ITFLAG_HAS_WRITEBACK 0x0400 /* Whether the user requested a contiguous operand */ #define NPY_OP_ITFLAG_CONTIG 0x0800 /* * The data layout of the iterator is fully specified by * a triple (itflags, ndim, nop). These three variables * are expected to exist in all functions calling these macros, * either as true variables initialized to the correct values * from the iterator, or as constants in the case of specialized * functions such as the various iternext functions. */ struct NpyIter_InternalOnly { /* Initial fixed position data */ npy_uint32 itflags; npy_uint8 ndim; int nop, maskop; npy_intp itersize, iterstart, iterend; /* iterindex is only used if RANGED or BUFFERED is set */ npy_intp iterindex; /* The rest is variable */ char iter_flexdata[]; }; typedef struct NpyIter_AxisData_tag NpyIter_AxisData; typedef struct NpyIter_TransferInfo_tag NpyIter_TransferInfo; typedef struct NpyIter_BufferData_tag NpyIter_BufferData; typedef npy_int16 npyiter_opitflags; /* Byte sizes of the iterator members */ #define NIT_PERM_SIZEOF(itflags, ndim, nop) \ NPY_PTR_ALIGNED(NPY_MAXDIMS) #define NIT_DTYPES_SIZEOF(itflags, ndim, nop) \ ((NPY_SIZEOF_PY_INTPTR_T)*(nop)) #define NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop) \ ((NPY_SIZEOF_PY_INTPTR_T)*(nop+1)) #define NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop) \ ((NPY_SIZEOF_PY_INTPTR_T)*(nop+1)) /* Could be sizeof intp */ #define NIT_OPERANDS_SIZEOF(itflags, ndim, nop) \ ((NPY_SIZEOF_PY_INTPTR_T)*(nop)) #define NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop) \ (NPY_PTR_ALIGNED(sizeof(npyiter_opitflags) * nop)) #define NIT_DATAPTRS_SIZEOF(itflags, ndim, nop) \ ((NPY_SIZEOF_PY_INTPTR_T)*(nop+1)) #define NIT_USERPTRS_SIZEOF(itflags, ndim, nop) \ ((NPY_SIZEOF_PY_INTPTR_T)*(nop+1)) #define NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop) \ ((itflags&NPY_ITFLAG_BUFFER) ? ( \ (NPY_SIZEOF_PY_INTPTR_T)*(8 + 4*nop) + sizeof(NpyIter_TransferInfo) * nop) : 0) /* Byte offsets of the iterator members starting from iter->iter_flexdata */ #define NIT_PERM_OFFSET() \ (0) #define NIT_DTYPES_OFFSET(itflags, ndim, nop) \ (NIT_PERM_OFFSET() + \ NIT_PERM_SIZEOF(itflags, ndim, nop)) #define NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) \ (NIT_DTYPES_OFFSET(itflags, ndim, nop) + \ NIT_DTYPES_SIZEOF(itflags, ndim, nop)) #define NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) \ (NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) + \ NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop)) #define NIT_OPERANDS_OFFSET(itflags, ndim, nop) \ (NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) + \ NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop)) #define NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) \ (NIT_OPERANDS_OFFSET(itflags, ndim, nop) + \ NIT_OPERANDS_SIZEOF(itflags, ndim, nop)) #define NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) \ (NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) + \ NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop)) #define NIT_DATAPTRS_OFFSET(itflags, ndim, nop) + \ (NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) + \ NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop)) #define NIT_USERPTRS_OFFSET(itflags, ndim, nop) + \ (NIT_DATAPTRS_OFFSET(itflags, ndim, nop) + \ NIT_DATAPTRS_SIZEOF(itflags, ndim, nop)) #define NIT_AXISDATA_OFFSET(itflags, ndim, nop) \ (NIT_USERPTRS_OFFSET(itflags, ndim, nop) + \ NIT_USERPTRS_SIZEOF(itflags, ndim, nop)) /* Internal-only ITERATOR DATA MEMBER ACCESS */ #define NIT_ITFLAGS(iter) \ ((iter)->itflags) #define NIT_NDIM(iter) \ ((iter)->ndim) #define NIT_NOP(iter) \ ((iter)->nop) #define NIT_MASKOP(iter) \ ((iter)->maskop) #define NIT_ITERSIZE(iter) \ (iter->itersize) #define NIT_ITERSTART(iter) \ (iter->iterstart) #define NIT_ITEREND(iter) \ (iter->iterend) #define NIT_ITERINDEX(iter) \ (iter->iterindex) #define NIT_PERM(iter) ((npy_int8 *)( \ iter->iter_flexdata + NIT_PERM_OFFSET())) #define NIT_DTYPES(iter) ((PyArray_Descr **)( \ iter->iter_flexdata + NIT_DTYPES_OFFSET(itflags, ndim, nop))) #define NIT_RESETDATAPTR(iter) ((char **)( \ iter->iter_flexdata + NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop))) #define NIT_BASEOFFSETS(iter) ((npy_intp *)( \ iter->iter_flexdata + NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop))) #define NIT_OPERANDS(iter) ((PyArrayObject **)( \ iter->iter_flexdata + NIT_OPERANDS_OFFSET(itflags, ndim, nop))) #define NIT_OPITFLAGS(iter) ((npyiter_opitflags *)( \ iter->iter_flexdata + NIT_OPITFLAGS_OFFSET(itflags, ndim, nop))) #define NIT_BUFFERDATA(iter) ((NpyIter_BufferData *)( \ iter->iter_flexdata + NIT_BUFFERDATA_OFFSET(itflags, ndim, nop))) #define NIT_DATAPTRS(iter) ((char **)( \ iter->iter_flexdata + NIT_DATAPTRS_OFFSET(itflags, ndim, nop))) #define NIT_USERPTRS(iter) ((char **)( \ iter->iter_flexdata + NIT_USERPTRS_OFFSET(itflags, ndim, nop))) #define NIT_AXISDATA(iter) ((NpyIter_AxisData *)( \ iter->iter_flexdata + NIT_AXISDATA_OFFSET(itflags, ndim, nop))) /* Internal-only BUFFERDATA MEMBER ACCESS */ struct NpyIter_TransferInfo_tag { NPY_cast_info read; NPY_cast_info write; NPY_traverse_info clear; /* Probably unnecessary, but make sure what follows is intptr aligned: */ Py_intptr_t _unused_ensure_alignment[]; }; struct NpyIter_BufferData_tag { npy_intp buffersize, size, bufiterend, reduce_pos, coresize, outersize, coreoffset, outerdim; Py_intptr_t bd_flexdata; }; #define NBF_BUFFERSIZE(bufferdata) ((bufferdata)->buffersize) #define NBF_SIZE(bufferdata) ((bufferdata)->size) #define NBF_BUFITEREND(bufferdata) ((bufferdata)->bufiterend) #define NBF_REDUCE_POS(bufferdata) ((bufferdata)->reduce_pos) #define NBF_CORESIZE(bufferdata) ((bufferdata)->coresize) #define NBF_COREOFFSET(bufferdata) ((bufferdata)->coreoffset) #define NBF_REDUCE_OUTERSIZE(bufferdata) ((bufferdata)->outersize) #define NBF_OUTERDIM(bufferdata) ((bufferdata)->outerdim) #define NBF_STRIDES(bufferdata) ( \ &(bufferdata)->bd_flexdata + 0) #define NBF_REDUCE_OUTERSTRIDES(bufferdata) ( \ (&(bufferdata)->bd_flexdata + 1*(nop))) #define NBF_REDUCE_OUTERPTRS(bufferdata) ((char **) \ (&(bufferdata)->bd_flexdata + 2*(nop))) #define NBF_BUFFERS(bufferdata) ((char **) \ (&(bufferdata)->bd_flexdata + 3*(nop))) #define NBF_TRANSFERINFO(bufferdata) ((NpyIter_TransferInfo *) \ (&(bufferdata)->bd_flexdata + 4*(nop))) /* Internal-only AXISDATA MEMBER ACCESS. */ struct NpyIter_AxisData_tag { npy_intp shape, index; Py_intptr_t ad_flexdata; }; #define NAD_SHAPE(axisdata) ((axisdata)->shape) #define NAD_INDEX(axisdata) ((axisdata)->index) #define NAD_STRIDES(axisdata) ( \ &(axisdata)->ad_flexdata + 0) #define NAD_NSTRIDES() \ ((nop) + ((itflags&NPY_ITFLAG_HASINDEX) ? 1 : 0)) /* Size of one AXISDATA struct within the iterator */ #define NIT_AXISDATA_SIZEOF(itflags, ndim, nop) (( \ /* intp shape */ \ 1 + \ /* intp index */ \ 1 + \ /* intp stride[nop+1] AND char* ptr[nop+1] */ \ 1*((nop)+1) \ )*(size_t)NPY_SIZEOF_PY_INTPTR_T) /* * Macro to advance an AXISDATA pointer by a specified count. * Requires that sizeof_axisdata be previously initialized * to NIT_AXISDATA_SIZEOF(itflags, ndim, nop). */ #define NIT_INDEX_AXISDATA(axisdata, index) ((NpyIter_AxisData *) \ (((char *)(axisdata)) + (index)*sizeof_axisdata)) #define NIT_ADVANCE_AXISDATA(axisdata, count) \ axisdata = NIT_INDEX_AXISDATA(axisdata, count) /* Size of the whole iterator */ #define NIT_SIZEOF_ITERATOR(itflags, ndim, nop) ( \ sizeof(struct NpyIter_InternalOnly) + \ NIT_AXISDATA_OFFSET(itflags, ndim, nop) + \ NIT_AXISDATA_SIZEOF(itflags, ndim, nop)*(ndim ? ndim : 1)) /* Internal helper functions shared between implementation files */ /** * Undo the axis permutation of the iterator. When the operand has fewer * dimensions then the iterator, this can return negative values for * inserted (broadcast) dimensions. * * @param axis Axis for which to undo the iterator axis permutation. * @param ndim If `op_axes` is being used, this is the iterator dimension, * otherwise this is the operand dimension. * @param perm The iterator axis permutation NIT_PERM(iter) * @param axis_flipped Will be set to true if this is a flipped axis * (i.e. is iterated in reversed order) and otherwise false. * Can be NULL if the information is not needed. * @return The unpermuted axis. Without `op_axes` this is correct, with * `op_axes` this indexes into `op_axes` (unpermuted iterator axis) */ static inline int npyiter_undo_iter_axis_perm( int axis, int ndim, const npy_int8 *perm, npy_bool *axis_flipped) { npy_int8 p = perm[axis]; /* The iterator treats axis reversed, thus adjust by ndim */ npy_bool flipped = p < 0; if (axis_flipped != NULL) { *axis_flipped = flipped; } if (flipped) { axis = ndim + p; } else { axis = ndim - p - 1; } return axis; } NPY_NO_EXPORT void npyiter_coalesce_axes(NpyIter *iter); NPY_NO_EXPORT int npyiter_allocate_buffers(NpyIter *iter, char **errmsg); NPY_NO_EXPORT void npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex); NPY_NO_EXPORT int npyiter_copy_from_buffers(NpyIter *iter); NPY_NO_EXPORT int npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs); NPY_NO_EXPORT void npyiter_clear_buffers(NpyIter *iter); #endif /* NUMPY_CORE_SRC_MULTIARRAY_NDITER_IMPL_H_ */