#define NPY_NO_DEPRECATED_API NPY_API_VERSION #define _MULTIARRAYMODULE #define PY_SSIZE_T_CLEAN #include #include "dlpack/dlpack.h" #include "numpy/arrayobject.h" #include "npy_argparse.h" #include "npy_dlpack.h" #include "npy_static_data.h" #include "conversion_utils.h" /* * Deleter for a NumPy exported dlpack DLManagedTensor(Versioned). */ static void array_dlpack_deleter(DLManagedTensorVersioned *self) { /* * Leak the pyobj if not initialized. This can happen if we are running * exit handlers that are destructing c++ objects with residual (owned) * PyObjects stored in them after the Python runtime has already been * terminated. */ if (!Py_IsInitialized()) { return; } PyGILState_STATE state = PyGILState_Ensure(); PyArrayObject *array = (PyArrayObject *)self->manager_ctx; // This will also free the shape and strides as it's one allocation. PyMem_Free(self); Py_XDECREF(array); PyGILState_Release(state); } /* TODO: Basically same as above until dlpack v0 is removed: */ static void array_dlpack_deleter_unversioned(DLManagedTensor *self) { if (!Py_IsInitialized()) { return; } PyGILState_STATE state = PyGILState_Ensure(); PyArrayObject *array = (PyArrayObject *)self->manager_ctx; PyMem_Free(self); Py_XDECREF(array); PyGILState_Release(state); } /* * Deleter for a DLPack capsule wrapping a DLManagedTensor(Versioned). * * This is exactly as mandated by dlpack */ static void dlpack_capsule_deleter(PyObject *self) { if (PyCapsule_IsValid(self, NPY_DLPACK_VERSIONED_USED_CAPSULE_NAME)) { return; } DLManagedTensorVersioned *managed = (DLManagedTensorVersioned *)PyCapsule_GetPointer( self, NPY_DLPACK_VERSIONED_CAPSULE_NAME); if (managed == NULL) { PyErr_WriteUnraisable(NULL); return; } /* * The spec says the deleter can be NULL if there is no way for the caller * to provide a reasonable destructor. */ if (managed->deleter) { managed->deleter(managed); } } /* TODO: Basically same as above until dlpack v0 is removed: */ static void dlpack_capsule_deleter_unversioned(PyObject *self) { if (PyCapsule_IsValid(self, NPY_DLPACK_USED_CAPSULE_NAME)) { return; } DLManagedTensor *managed = (DLManagedTensor *)PyCapsule_GetPointer(self, NPY_DLPACK_CAPSULE_NAME); if (managed == NULL) { PyErr_WriteUnraisable(NULL); return; } if (managed->deleter) { managed->deleter(managed); } } /* * Deleter for the capsule used as a `base` in `from_dlpack`. * * This is almost identical to the above used internally as the base for our array * so that we can consume (rename) the original capsule. */ static void array_dlpack_internal_capsule_deleter(PyObject *self) { DLManagedTensorVersioned *managed = (DLManagedTensorVersioned *)PyCapsule_GetPointer( self, NPY_DLPACK_VERSIONED_INTERNAL_CAPSULE_NAME); if (managed == NULL) { PyErr_WriteUnraisable(NULL); return; } /* * the spec says the deleter can be NULL if there is no way for the caller * to provide a reasonable destructor. */ if (managed->deleter) { managed->deleter(managed); /* TODO: is the deleter allowed to set a python exception? */ assert(!PyErr_Occurred()); } } /* TODO: Basically same as above until dlpack v0 is removed: */ static void array_dlpack_internal_capsule_deleter_unversioned(PyObject *self) { DLManagedTensor *managed = (DLManagedTensor *)PyCapsule_GetPointer( self, NPY_DLPACK_INTERNAL_CAPSULE_NAME); if (managed == NULL) { PyErr_WriteUnraisable(NULL); return; } if (managed->deleter) { managed->deleter(managed); assert(!PyErr_Occurred()); } } // This function cannot return NULL, but it can fail, // So call PyErr_Occurred to check if it failed after // calling it. static DLDevice array_get_dl_device(PyArrayObject *self) { DLDevice ret; ret.device_type = kDLCPU; ret.device_id = 0; PyObject *base = PyArray_BASE(self); // walk the bases (see gh-20340) while (base != NULL && PyArray_Check(base)) { base = PyArray_BASE((PyArrayObject *)base); } // The outer if is due to the fact that NumPy arrays are on the CPU // by default (if not created from DLPack). if (PyCapsule_IsValid(base, NPY_DLPACK_INTERNAL_CAPSULE_NAME)) { DLManagedTensor *managed = (DLManagedTensor *)PyCapsule_GetPointer( base, NPY_DLPACK_INTERNAL_CAPSULE_NAME); if (managed == NULL) { return ret; } return managed->dl_tensor.device; } else if (PyCapsule_IsValid(base, NPY_DLPACK_VERSIONED_INTERNAL_CAPSULE_NAME)) { DLManagedTensorVersioned *managed = (DLManagedTensorVersioned *)PyCapsule_GetPointer( base, NPY_DLPACK_VERSIONED_INTERNAL_CAPSULE_NAME); if (managed == NULL) { return ret; } return managed->dl_tensor.device; } return ret; } /* * Fill the dl_tensor struct from the `self` array. * This struct could be versioned, but as of now is not. */ static int fill_dl_tensor_information( DLTensor *dl_tensor, PyArrayObject *self, DLDevice *result_device) { npy_intp itemsize = PyArray_ITEMSIZE(self); int ndim = PyArray_NDIM(self); npy_intp *strides = PyArray_STRIDES(self); npy_intp *shape = PyArray_SHAPE(self); if (!PyArray_IS_C_CONTIGUOUS(self) && PyArray_SIZE(self) != 1) { for (int i = 0; i < ndim; ++i) { if (shape[i] != 1 && strides[i] % itemsize != 0) { PyErr_SetString(PyExc_BufferError, "DLPack only supports strides which are a multiple of " "itemsize."); return -1; } } } DLDataType managed_dtype; PyArray_Descr *dtype = PyArray_DESCR(self); if (PyDataType_ISBYTESWAPPED(dtype)) { PyErr_SetString(PyExc_BufferError, "DLPack only supports native byte order."); return -1; } managed_dtype.bits = 8 * itemsize; managed_dtype.lanes = 1; if (PyDataType_ISBOOL(dtype)) { managed_dtype.code = kDLBool; } else if (PyDataType_ISSIGNED(dtype)) { managed_dtype.code = kDLInt; } else if (PyDataType_ISUNSIGNED(dtype)) { managed_dtype.code = kDLUInt; } else if (PyDataType_ISFLOAT(dtype)) { // We can't be sure that the dtype is // IEEE or padded. if (itemsize > 8) { PyErr_SetString(PyExc_BufferError, "DLPack only supports IEEE floating point types " "without padding (longdouble typically is not IEEE)."); return -1; } managed_dtype.code = kDLFloat; } else if (PyDataType_ISCOMPLEX(dtype)) { // We can't be sure that the dtype is // IEEE or padded. if (itemsize > 16) { PyErr_SetString(PyExc_BufferError, "DLPack only supports IEEE floating point types " "without padding (longdouble typically is not IEEE)."); return -1; } managed_dtype.code = kDLComplex; } else { PyErr_SetString(PyExc_BufferError, "DLPack only supports signed/unsigned integers, float " "and complex dtypes."); return -1; } /* * Note: the `dlpack.h` header suggests/standardizes that `data` must be * 256-byte aligned. We ignore this intentionally, because `__dlpack__` * standardizes that `byte_offset` must be 0 (for now) to not break pytorch: * https://github.com/data-apis/array-api/issues/293#issuecomment-964111413 * * We further assume that exporting fully unaligned data is OK even without * `byte_offset` since the standard does not reject it. * Presumably, pytorch will support importing `byte_offset != 0` and NumPy * can choose to use it starting about 2023. At that point, it may be * that NumPy MUST use `byte_offset` to adhere to the standard (as * specified in the header)! */ dl_tensor->data = PyArray_DATA(self); dl_tensor->byte_offset = 0; dl_tensor->device = *result_device; dl_tensor->dtype = managed_dtype; for (int i = 0; i < ndim; ++i) { dl_tensor->shape[i] = shape[i]; // Strides in DLPack are items; in NumPy are bytes. dl_tensor->strides[i] = strides[i] / itemsize; } dl_tensor->ndim = ndim; if (PyArray_IS_C_CONTIGUOUS(self)) { /* No need to pass strides, so just NULL it again */ dl_tensor->strides = NULL; } dl_tensor->byte_offset = 0; return 0; } static PyObject * create_dlpack_capsule( PyArrayObject *self, int versioned, DLDevice *result_device, int copied) { int ndim = PyArray_NDIM(self); /* * We align shape and strides at the end but need to align them, offset * gives the offset of the shape (and strides) including the struct size. */ size_t align = sizeof(int64_t); size_t struct_size = ( versioned ? sizeof(DLManagedTensorVersioned) : sizeof(DLManagedTensor)); size_t offset = (struct_size + align - 1) / align * align; void *ptr = PyMem_Malloc(offset + (sizeof(int64_t) * ndim * 2)); if (ptr == NULL) { PyErr_NoMemory(); return NULL; } DLTensor *dl_tensor; PyCapsule_Destructor capsule_deleter; const char *capsule_name; if (versioned) { DLManagedTensorVersioned *managed = (DLManagedTensorVersioned *)ptr; capsule_name = NPY_DLPACK_VERSIONED_CAPSULE_NAME; capsule_deleter = (PyCapsule_Destructor)dlpack_capsule_deleter; managed->deleter = array_dlpack_deleter; managed->manager_ctx = self; dl_tensor = &managed->dl_tensor; /* The versioned tensor has additional fields that we need to set */ managed->version.major = 1; managed->version.minor = 0; managed->flags = 0; if (!PyArray_CHKFLAGS(self, NPY_ARRAY_WRITEABLE)) { managed->flags |= DLPACK_FLAG_BITMASK_READ_ONLY; } if (copied) { managed->flags |= DLPACK_FLAG_BITMASK_IS_COPIED; } } else { DLManagedTensor *managed = (DLManagedTensor *)ptr; capsule_name = NPY_DLPACK_CAPSULE_NAME; capsule_deleter = (PyCapsule_Destructor)dlpack_capsule_deleter_unversioned; managed->deleter = array_dlpack_deleter_unversioned; managed->manager_ctx = self; dl_tensor = &managed->dl_tensor; } dl_tensor->shape = (int64_t *)((char *)ptr + offset); /* Note that strides may be set to NULL later if C-contiguous */ dl_tensor->strides = dl_tensor->shape + ndim; if (fill_dl_tensor_information(dl_tensor, self, result_device) < 0) { PyMem_Free(ptr); return NULL; } PyObject *capsule = PyCapsule_New(ptr, capsule_name, capsule_deleter); if (capsule == NULL) { PyMem_Free(ptr); return NULL; } // the capsule holds a reference Py_INCREF(self); return capsule; } static int device_converter(PyObject *obj, DLDevice *result_device) { int type, id; if (obj == Py_None) { return NPY_SUCCEED; } if (!PyTuple_Check(obj)) { PyErr_SetString(PyExc_TypeError, "dl_device must be a tuple"); return NPY_FAIL; } if (!PyArg_ParseTuple(obj, "ii", &type, &id)) { return NPY_FAIL; } /* We can honor the request if matches the existing one or is CPU */ if (type == result_device->device_type && id == result_device->device_id) { return NPY_SUCCEED; } if (type == kDLCPU && id == 0) { result_device->device_type = type; result_device->device_id = id; return NPY_SUCCEED; } PyErr_SetString(PyExc_ValueError, "unsupported device requested"); return NPY_FAIL; } NPY_NO_EXPORT PyObject * array_dlpack(PyArrayObject *self, PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) { PyObject *stream = Py_None; PyObject *max_version = Py_None; NPY_COPYMODE copy_mode = NPY_COPY_IF_NEEDED; long major_version = 0; /* We allow the user to request a result device in principle. */ DLDevice result_device = array_get_dl_device(self); if (PyErr_Occurred()) { return NULL; } NPY_PREPARE_ARGPARSER; if (npy_parse_arguments("__dlpack__", args, len_args, kwnames, "$stream", NULL, &stream, "$max_version", NULL, &max_version, "$dl_device", &device_converter, &result_device, "$copy", &PyArray_CopyConverter, ©_mode, NULL, NULL, NULL)) { return NULL; } if (max_version != Py_None) { if (!PyTuple_Check(max_version) || PyTuple_GET_SIZE(max_version) != 2) { PyErr_SetString(PyExc_TypeError, "max_version must be None or a tuple with two elements."); return NULL; } major_version = PyLong_AsLong(PyTuple_GET_ITEM(max_version, 0)); if (major_version == -1 && PyErr_Occurred()) { return NULL; } } if (stream != Py_None) { PyErr_SetString(PyExc_RuntimeError, "NumPy only supports stream=None."); return NULL; } /* If the user requested a copy be made, honor that here already */ if (copy_mode == NPY_COPY_ALWAYS) { /* TODO: It may be good to check ability to export dtype first. */ self = (PyArrayObject *)PyArray_NewCopy(self, NPY_KEEPORDER); if (self == NULL) { return NULL; } } else { Py_INCREF(self); } if (major_version < 1 && !(PyArray_FLAGS(self) & NPY_ARRAY_WRITEABLE)) { PyErr_SetString(PyExc_BufferError, "Cannot export readonly array since signalling readonly " "is unsupported by DLPack (supported by newer DLPack version)."); Py_DECREF(self); return NULL; } /* * TODO: The versioned and non-versioned structs of DLPack are very * similar but not ABI compatible so that the function called here requires * branching (templating didn't seem worthwhile). * * Version 0 support should be deprecated in NumPy 2.1 and the branches * can then be removed again. */ PyObject *res = create_dlpack_capsule( self, major_version >= 1, &result_device, copy_mode == NPY_COPY_ALWAYS); Py_DECREF(self); return res; } NPY_NO_EXPORT PyObject * array_dlpack_device(PyArrayObject *self, PyObject *NPY_UNUSED(args)) { DLDevice device = array_get_dl_device(self); if (PyErr_Occurred()) { return NULL; } return Py_BuildValue("ii", device.device_type, device.device_id); } NPY_NO_EXPORT PyObject * from_dlpack(PyObject *NPY_UNUSED(self), PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) { PyObject *obj, *copy = Py_None, *device = Py_None; NPY_PREPARE_ARGPARSER; if (npy_parse_arguments("from_dlpack", args, len_args, kwnames, "obj", NULL, &obj, "$copy", NULL, ©, "$device", NULL, &device, NULL, NULL, NULL) < 0) { return NULL; } /* * Prepare arguments for the full call. We always forward copy and pass * our max_version. `device` is always passed as `None`, but if the user * provided a device, we will replace it with the "cpu": (1, 0). */ PyObject *call_args[] = {obj, Py_None, copy, npy_static_pydata.dl_max_version}; Py_ssize_t nargsf = 1 | PY_VECTORCALL_ARGUMENTS_OFFSET; /* If device is passed it must be "cpu" and replace it with (1, 0) */ if (device != Py_None) { /* test that device is actually CPU */ NPY_DEVICE device_request = NPY_DEVICE_CPU; if (!PyArray_DeviceConverterOptional(device, &device_request)) { return NULL; } assert(device_request == NPY_DEVICE_CPU); call_args[1] = npy_static_pydata.dl_cpu_device_tuple; } PyObject *capsule = PyObject_VectorcallMethod( npy_interned_str.__dlpack__, call_args, nargsf, npy_static_pydata.dl_call_kwnames); if (capsule == NULL) { /* * TODO: This path should be deprecated in NumPy 2.1. Once deprecated * the below code can be simplified w.r.t. to versioned/unversioned. * * We try without any arguments if both device and copy are None, * since the exporter may not support older versions of the protocol. */ if (PyErr_ExceptionMatches(PyExc_TypeError) && device == Py_None && copy == Py_None) { /* max_version may be unsupported, try without kwargs */ PyErr_Clear(); capsule = PyObject_VectorcallMethod( npy_interned_str.__dlpack__, call_args, nargsf, NULL); } if (capsule == NULL) { return NULL; } } void *managed_ptr; DLTensor dl_tensor; int readonly; int versioned = PyCapsule_IsValid(capsule, NPY_DLPACK_VERSIONED_CAPSULE_NAME); if (versioned) { managed_ptr = PyCapsule_GetPointer(capsule, NPY_DLPACK_VERSIONED_CAPSULE_NAME); DLManagedTensorVersioned *managed = (DLManagedTensorVersioned *)managed_ptr; if (managed == NULL) { Py_DECREF(capsule); return NULL; } if (managed->version.major > 1) { PyErr_SetString(PyExc_BufferError, "from_dlpack(): the exported DLPack major version is too " "high to be imported by this version of NumPy."); Py_DECREF(capsule); return NULL; } dl_tensor = managed->dl_tensor; readonly = (managed->flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0; } else { managed_ptr = PyCapsule_GetPointer(capsule, NPY_DLPACK_CAPSULE_NAME); DLManagedTensor *managed = (DLManagedTensor *)managed_ptr; if (managed == NULL) { Py_DECREF(capsule); return NULL; } dl_tensor = managed->dl_tensor; readonly = 1; } const int ndim = dl_tensor.ndim; if (ndim > NPY_MAXDIMS) { PyErr_SetString(PyExc_RuntimeError, "maxdims of DLPack tensor is higher than the supported " "maxdims."); Py_DECREF(capsule); return NULL; } DLDeviceType device_type = dl_tensor.device.device_type; if (device_type != kDLCPU && device_type != kDLCUDAHost && device_type != kDLROCMHost && device_type != kDLCUDAManaged) { PyErr_SetString(PyExc_RuntimeError, "Unsupported device in DLTensor."); Py_DECREF(capsule); return NULL; } if (dl_tensor.dtype.lanes != 1) { PyErr_SetString(PyExc_RuntimeError, "Unsupported lanes in DLTensor dtype."); Py_DECREF(capsule); return NULL; } int typenum = -1; const uint8_t bits = dl_tensor.dtype.bits; const npy_intp itemsize = bits / 8; switch (dl_tensor.dtype.code) { case kDLBool: if (bits == 8) { typenum = NPY_BOOL; } break; case kDLInt: switch (bits) { case 8: typenum = NPY_INT8; break; case 16: typenum = NPY_INT16; break; case 32: typenum = NPY_INT32; break; case 64: typenum = NPY_INT64; break; } break; case kDLUInt: switch (bits) { case 8: typenum = NPY_UINT8; break; case 16: typenum = NPY_UINT16; break; case 32: typenum = NPY_UINT32; break; case 64: typenum = NPY_UINT64; break; } break; case kDLFloat: switch (bits) { case 16: typenum = NPY_FLOAT16; break; case 32: typenum = NPY_FLOAT32; break; case 64: typenum = NPY_FLOAT64; break; } break; case kDLComplex: switch (bits) { case 64: typenum = NPY_COMPLEX64; break; case 128: typenum = NPY_COMPLEX128; break; } break; } if (typenum == -1) { PyErr_SetString(PyExc_RuntimeError, "Unsupported dtype in DLTensor."); Py_DECREF(capsule); return NULL; } npy_intp shape[NPY_MAXDIMS]; npy_intp strides[NPY_MAXDIMS]; for (int i = 0; i < ndim; ++i) { shape[i] = dl_tensor.shape[i]; // DLPack has elements as stride units, NumPy has bytes. if (dl_tensor.strides != NULL) { strides[i] = dl_tensor.strides[i] * itemsize; } } char *data = (char *)dl_tensor.data + dl_tensor.byte_offset; PyArray_Descr *descr = PyArray_DescrFromType(typenum); if (descr == NULL) { Py_DECREF(capsule); return NULL; } PyObject *ret = PyArray_NewFromDescr(&PyArray_Type, descr, ndim, shape, dl_tensor.strides != NULL ? strides : NULL, data, readonly ? 0 : NPY_ARRAY_WRITEABLE, NULL); if (ret == NULL) { Py_DECREF(capsule); return NULL; } PyObject *new_capsule; if (versioned) { new_capsule = PyCapsule_New(managed_ptr, NPY_DLPACK_VERSIONED_INTERNAL_CAPSULE_NAME, (PyCapsule_Destructor)array_dlpack_internal_capsule_deleter); } else { new_capsule = PyCapsule_New(managed_ptr, NPY_DLPACK_INTERNAL_CAPSULE_NAME, (PyCapsule_Destructor)array_dlpack_internal_capsule_deleter_unversioned); } if (new_capsule == NULL) { Py_DECREF(capsule); Py_DECREF(ret); return NULL; } if (PyArray_SetBaseObject((PyArrayObject *)ret, new_capsule) < 0) { Py_DECREF(capsule); Py_DECREF(ret); return NULL; } const char *new_name = ( versioned ? NPY_DLPACK_VERSIONED_USED_CAPSULE_NAME : NPY_DLPACK_USED_CAPSULE_NAME); if (PyCapsule_SetName(capsule, new_name) < 0) { Py_DECREF(capsule); Py_DECREF(ret); return NULL; } Py_DECREF(capsule); return ret; }