""" This module contains a set of functions for vectorized string operations. """ import functools import sys import numpy as np from numpy import ( add, equal, greater, greater_equal, less, less_equal, multiply as _multiply_ufunc, not_equal, ) from numpy._core.multiarray import _vec_string from numpy._core.overrides import array_function_dispatch, set_module from numpy._core.umath import ( _center, _expandtabs, _expandtabs_length, _ljust, _lstrip_chars, _lstrip_whitespace, _partition, _partition_index, _replace, _rjust, _rpartition, _rpartition_index, _rstrip_chars, _rstrip_whitespace, _slice, _strip_chars, _strip_whitespace, _zfill, count as _count_ufunc, endswith as _endswith_ufunc, find as _find_ufunc, index as _index_ufunc, isalnum, isalpha, isdecimal, isdigit, islower, isnumeric, isspace, istitle, isupper, rfind as _rfind_ufunc, rindex as _rindex_ufunc, startswith as _startswith_ufunc, str_len, ) def _override___module__(): for ufunc in [ isalnum, isalpha, isdecimal, isdigit, islower, isnumeric, isspace, istitle, isupper, str_len, ]: ufunc.__module__ = "numpy.strings" ufunc.__qualname__ = ufunc.__name__ _override___module__() __all__ = [ # UFuncs "equal", "not_equal", "less", "less_equal", "greater", "greater_equal", "add", "multiply", "isalpha", "isdigit", "isspace", "isalnum", "islower", "isupper", "istitle", "isdecimal", "isnumeric", "str_len", "find", "rfind", "index", "rindex", "count", "startswith", "endswith", "lstrip", "rstrip", "strip", "replace", "expandtabs", "center", "ljust", "rjust", "zfill", "partition", "rpartition", "slice", # _vec_string - Will gradually become ufuncs as well "upper", "lower", "swapcase", "capitalize", "title", # _vec_string - Will probably not become ufuncs "mod", "decode", "encode", "translate", # Removed from namespace until behavior has been crystallized # "join", "split", "rsplit", "splitlines", ] MAX = np.iinfo(np.int64).max array_function_dispatch = functools.partial( array_function_dispatch, module='numpy.strings') def _get_num_chars(a): """ Helper function that returns the number of characters per field in a string or unicode array. This is to abstract out the fact that for a unicode array this is itemsize / 4. """ if issubclass(a.dtype.type, np.str_): return a.itemsize // 4 return a.itemsize def _to_bytes_or_str_array(result, output_dtype_like): """ Helper function to cast a result back into an array with the appropriate dtype if an object array must be used as an intermediary. """ output_dtype_like = np.asarray(output_dtype_like) if result.size == 0: # Calling asarray & tolist in an empty array would result # in losing shape information return result.astype(output_dtype_like.dtype) ret = np.asarray(result.tolist()) if isinstance(output_dtype_like.dtype, np.dtypes.StringDType): return ret.astype(type(output_dtype_like.dtype)) return ret.astype(type(output_dtype_like.dtype)(_get_num_chars(ret))) def _clean_args(*args): """ Helper function for delegating arguments to Python string functions. Many of the Python string operations that have optional arguments do not use 'None' to indicate a default value. In these cases, we need to remove all None arguments, and those following them. """ newargs = [] for chk in args: if chk is None: break newargs.append(chk) return newargs def _multiply_dispatcher(a, i): return (a,) @set_module("numpy.strings") @array_function_dispatch(_multiply_dispatcher) def multiply(a, i): """ Return (a * i), that is string multiple concatenation, element-wise. Values in ``i`` of less than 0 are treated as 0 (which yields an empty string). Parameters ---------- a : array_like, with ``StringDType``, ``bytes_`` or ``str_`` dtype i : array_like, with any integer dtype Returns ------- out : ndarray Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype, depending on input types Examples -------- >>> import numpy as np >>> a = np.array(["a", "b", "c"]) >>> np.strings.multiply(a, 3) array(['aaa', 'bbb', 'ccc'], dtype='>> i = np.array([1, 2, 3]) >>> np.strings.multiply(a, i) array(['a', 'bb', 'ccc'], dtype='>> np.strings.multiply(np.array(['a']), i) array(['a', 'aa', 'aaa'], dtype='>> a = np.array(['a', 'b', 'c', 'd', 'e', 'f']).reshape((2, 3)) >>> np.strings.multiply(a, 3) array([['aaa', 'bbb', 'ccc'], ['ddd', 'eee', 'fff']], dtype='>> np.strings.multiply(a, i) array([['a', 'bb', 'ccc'], ['d', 'ee', 'fff']], dtype=' sys.maxsize / np.maximum(i, 1)): raise OverflowError("Overflow encountered in string multiply") buffersizes = a_len * i out_dtype = f"{a.dtype.char}{buffersizes.max()}" out = np.empty_like(a, shape=buffersizes.shape, dtype=out_dtype) return _multiply_ufunc(a, i, out=out) def _mod_dispatcher(a, values): return (a, values) @set_module("numpy.strings") @array_function_dispatch(_mod_dispatcher) def mod(a, values): """ Return (a % i), that is pre-Python 2.6 string formatting (interpolation), element-wise for a pair of array_likes of str or unicode. Parameters ---------- a : array_like, with `np.bytes_` or `np.str_` dtype values : array_like of values These values will be element-wise interpolated into the string. Returns ------- out : ndarray Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype, depending on input types Examples -------- >>> import numpy as np >>> a = np.array(["NumPy is a %s library"]) >>> np.strings.mod(a, values=["Python"]) array(['NumPy is a Python library'], dtype='>> a = np.array([b'%d bytes', b'%d bits']) >>> values = np.array([8, 64]) >>> np.strings.mod(a, values) array([b'8 bytes', b'64 bits'], dtype='|S7') """ return _to_bytes_or_str_array( _vec_string(a, np.object_, '__mod__', (values,)), a) @set_module("numpy.strings") def find(a, sub, start=0, end=None): """ For each element, return the lowest index in the string where substring ``sub`` is found, such that ``sub`` is contained in the range [``start``, ``end``). Parameters ---------- a : array_like, with ``StringDType``, ``bytes_`` or ``str_`` dtype sub : array_like, with `np.bytes_` or `np.str_` dtype The substring to search for. start, end : array_like, with any integer dtype The range to look in, interpreted as in slice notation. Returns ------- y : ndarray Output array of ints See Also -------- str.find Examples -------- >>> import numpy as np >>> a = np.array(["NumPy is a Python library"]) >>> np.strings.find(a, "Python") array([11]) """ end = end if end is not None else MAX return _find_ufunc(a, sub, start, end) @set_module("numpy.strings") def rfind(a, sub, start=0, end=None): """ For each element, return the highest index in the string where substring ``sub`` is found, such that ``sub`` is contained in the range [``start``, ``end``). Parameters ---------- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype sub : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype The substring to search for. start, end : array_like, with any integer dtype The range to look in, interpreted as in slice notation. Returns ------- y : ndarray Output array of ints See Also -------- str.rfind Examples -------- >>> import numpy as np >>> a = np.array(["Computer Science"]) >>> np.strings.rfind(a, "Science", start=0, end=None) array([9]) >>> np.strings.rfind(a, "Science", start=0, end=8) array([-1]) >>> b = np.array(["Computer Science", "Science"]) >>> np.strings.rfind(b, "Science", start=0, end=None) array([9, 0]) """ end = end if end is not None else MAX return _rfind_ufunc(a, sub, start, end) @set_module("numpy.strings") def index(a, sub, start=0, end=None): """ Like `find`, but raises :exc:`ValueError` when the substring is not found. Parameters ---------- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype sub : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype start, end : array_like, with any integer dtype, optional Returns ------- out : ndarray Output array of ints. See Also -------- find, str.index Examples -------- >>> import numpy as np >>> a = np.array(["Computer Science"]) >>> np.strings.index(a, "Science", start=0, end=None) array([9]) """ end = end if end is not None else MAX return _index_ufunc(a, sub, start, end) @set_module("numpy.strings") def rindex(a, sub, start=0, end=None): """ Like `rfind`, but raises :exc:`ValueError` when the substring `sub` is not found. Parameters ---------- a : array-like, with `np.bytes_` or `np.str_` dtype sub : array-like, with `np.bytes_` or `np.str_` dtype start, end : array-like, with any integer dtype, optional Returns ------- out : ndarray Output array of ints. See Also -------- rfind, str.rindex Examples -------- >>> a = np.array(["Computer Science"]) >>> np.strings.rindex(a, "Science", start=0, end=None) array([9]) """ end = end if end is not None else MAX return _rindex_ufunc(a, sub, start, end) @set_module("numpy.strings") def count(a, sub, start=0, end=None): """ Returns an array with the number of non-overlapping occurrences of substring ``sub`` in the range [``start``, ``end``). Parameters ---------- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype sub : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype The substring to search for. start, end : array_like, with any integer dtype The range to look in, interpreted as in slice notation. Returns ------- y : ndarray Output array of ints See Also -------- str.count Examples -------- >>> import numpy as np >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) >>> c array(['aAaAaA', ' aA ', 'abBABba'], dtype='>> np.strings.count(c, 'A') array([3, 1, 1]) >>> np.strings.count(c, 'aA') array([3, 1, 0]) >>> np.strings.count(c, 'A', start=1, end=4) array([2, 1, 1]) >>> np.strings.count(c, 'A', start=1, end=3) array([1, 0, 0]) """ end = end if end is not None else MAX return _count_ufunc(a, sub, start, end) @set_module("numpy.strings") def startswith(a, prefix, start=0, end=None): """ Returns a boolean array which is `True` where the string element in ``a`` starts with ``prefix``, otherwise `False`. Parameters ---------- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype prefix : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype start, end : array_like, with any integer dtype With ``start``, test beginning at that position. With ``end``, stop comparing at that position. Returns ------- out : ndarray Output array of bools See Also -------- str.startswith Examples -------- >>> import numpy as np >>> s = np.array(['foo', 'bar']) >>> s array(['foo', 'bar'], dtype='>> np.strings.startswith(s, 'fo') array([True, False]) >>> np.strings.startswith(s, 'o', start=1, end=2) array([True, False]) """ end = end if end is not None else MAX return _startswith_ufunc(a, prefix, start, end) @set_module("numpy.strings") def endswith(a, suffix, start=0, end=None): """ Returns a boolean array which is `True` where the string element in ``a`` ends with ``suffix``, otherwise `False`. Parameters ---------- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype suffix : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype start, end : array_like, with any integer dtype With ``start``, test beginning at that position. With ``end``, stop comparing at that position. Returns ------- out : ndarray Output array of bools See Also -------- str.endswith Examples -------- >>> import numpy as np >>> s = np.array(['foo', 'bar']) >>> s array(['foo', 'bar'], dtype='>> np.strings.endswith(s, 'ar') array([False, True]) >>> np.strings.endswith(s, 'a', start=1, end=2) array([False, True]) """ end = end if end is not None else MAX return _endswith_ufunc(a, suffix, start, end) def _code_dispatcher(a, encoding=None, errors=None): return (a,) @set_module("numpy.strings") @array_function_dispatch(_code_dispatcher) def decode(a, encoding=None, errors=None): r""" Calls :meth:`bytes.decode` element-wise. The set of available codecs comes from the Python standard library, and may be extended at runtime. For more information, see the :mod:`codecs` module. Parameters ---------- a : array_like, with ``bytes_`` dtype encoding : str, optional The name of an encoding errors : str, optional Specifies how to handle encoding errors Returns ------- out : ndarray See Also -------- :py:meth:`bytes.decode` Notes ----- The type of the result will depend on the encoding specified. Examples -------- >>> import numpy as np >>> c = np.array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@', ... b'\x81\x82\xc2\xc1\xc2\x82\x81']) >>> c array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@', b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7') >>> np.strings.decode(c, encoding='cp037') array(['aAaAaA', ' aA ', 'abBABba'], dtype='>> import numpy as np >>> a = np.array(['aAaAaA', ' aA ', 'abBABba']) >>> np.strings.encode(a, encoding='cp037') array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@', b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7') """ return _to_bytes_or_str_array( _vec_string(a, np.object_, 'encode', _clean_args(encoding, errors)), np.bytes_(b'')) def _expandtabs_dispatcher(a, tabsize=None): return (a,) @set_module("numpy.strings") @array_function_dispatch(_expandtabs_dispatcher) def expandtabs(a, tabsize=8): """ Return a copy of each string element where all tab characters are replaced by one or more spaces. Calls :meth:`str.expandtabs` element-wise. Return a copy of each string element where all tab characters are replaced by one or more spaces, depending on the current column and the given `tabsize`. The column number is reset to zero after each newline occurring in the string. This doesn't understand other non-printing characters or escape sequences. Parameters ---------- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype Input array tabsize : int, optional Replace tabs with `tabsize` number of spaces. If not given defaults to 8 spaces. Returns ------- out : ndarray Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype, depending on input type See Also -------- str.expandtabs Examples -------- >>> import numpy as np >>> a = np.array(['\t\tHello\tworld']) >>> np.strings.expandtabs(a, tabsize=4) # doctest: +SKIP array([' Hello world'], dtype='>> import numpy as np >>> c = np.array(['a1b2','1b2a','b2a1','2a1b']); c array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='>> np.strings.center(c, width=9) array([' a1b2 ', ' 1b2a ', ' b2a1 ', ' 2a1b '], dtype='>> np.strings.center(c, width=9, fillchar='*') array(['***a1b2**', '***1b2a**', '***b2a1**', '***2a1b**'], dtype='>> np.strings.center(c, width=1) array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='>> import numpy as np >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) >>> np.strings.ljust(c, width=3) array(['aAaAaA', ' aA ', 'abBABba'], dtype='>> np.strings.ljust(c, width=9) array(['aAaAaA ', ' aA ', 'abBABba '], dtype='>> import numpy as np >>> a = np.array(['aAaAaA', ' aA ', 'abBABba']) >>> np.strings.rjust(a, width=3) array(['aAaAaA', ' aA ', 'abBABba'], dtype='>> np.strings.rjust(a, width=9) array([' aAaAaA', ' aA ', ' abBABba'], dtype='>> import numpy as np >>> np.strings.zfill(['1', '-1', '+1'], 3) array(['001', '-01', '+01'], dtype='>> import numpy as np >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) >>> c array(['aAaAaA', ' aA ', 'abBABba'], dtype='>> np.strings.lstrip(c, 'a') array(['AaAaA', ' aA ', 'bBABba'], dtype='>> np.strings.lstrip(c, 'A') # leaves c unchanged array(['aAaAaA', ' aA ', 'abBABba'], dtype='>> (np.strings.lstrip(c, ' ') == np.strings.lstrip(c, '')).all() np.False_ >>> (np.strings.lstrip(c, ' ') == np.strings.lstrip(c)).all() np.True_ """ if chars is None: return _lstrip_whitespace(a) return _lstrip_chars(a, chars) @set_module("numpy.strings") def rstrip(a, chars=None): """ For each element in `a`, return a copy with the trailing characters removed. Parameters ---------- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype chars : scalar with the same dtype as ``a``, optional The ``chars`` argument is a string specifying the set of characters to be removed. If ``None``, the ``chars`` argument defaults to removing whitespace. The ``chars`` argument is not a prefix or suffix; rather, all combinations of its values are stripped. Returns ------- out : ndarray Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype, depending on input types See Also -------- str.rstrip Examples -------- >>> import numpy as np >>> c = np.array(['aAaAaA', 'abBABba']) >>> c array(['aAaAaA', 'abBABba'], dtype='>> np.strings.rstrip(c, 'a') array(['aAaAaA', 'abBABb'], dtype='>> np.strings.rstrip(c, 'A') array(['aAaAa', 'abBABba'], dtype='>> import numpy as np >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) >>> c array(['aAaAaA', ' aA ', 'abBABba'], dtype='>> np.strings.strip(c) array(['aAaAaA', 'aA', 'abBABba'], dtype='>> np.strings.strip(c, 'a') array(['AaAaA', ' aA ', 'bBABb'], dtype='>> np.strings.strip(c, 'A') array(['aAaAa', ' aA ', 'abBABba'], dtype='>> import numpy as np >>> c = np.array(['a1b c', '1bca', 'bca1']); c array(['a1b c', '1bca', 'bca1'], dtype='>> np.strings.upper(c) array(['A1B C', '1BCA', 'BCA1'], dtype='>> import numpy as np >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c array(['A1B C', '1BCA', 'BCA1'], dtype='>> np.strings.lower(c) array(['a1b c', '1bca', 'bca1'], dtype='>> import numpy as np >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'], dtype='|S5') >>> np.strings.swapcase(c) array(['A1b C', '1B cA', 'B cA1', 'Ca1B'], dtype='|S5') """ a_arr = np.asarray(a) return _vec_string(a_arr, a_arr.dtype, 'swapcase') @set_module("numpy.strings") @array_function_dispatch(_unary_op_dispatcher) def capitalize(a): """ Return a copy of ``a`` with only the first character of each element capitalized. Calls :meth:`str.capitalize` element-wise. For byte strings, this method is locale-dependent. Parameters ---------- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype Input array of strings to capitalize. Returns ------- out : ndarray Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype, depending on input types See Also -------- str.capitalize Examples -------- >>> import numpy as np >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='|S4') >>> np.strings.capitalize(c) array(['A1b2', '1b2a', 'B2a1', '2a1b'], dtype='|S4') """ a_arr = np.asarray(a) return _vec_string(a_arr, a_arr.dtype, 'capitalize') @set_module("numpy.strings") @array_function_dispatch(_unary_op_dispatcher) def title(a): """ Return element-wise title cased version of string or unicode. Title case words start with uppercase characters, all remaining cased characters are lowercase. Calls :meth:`str.title` element-wise. For 8-bit strings, this method is locale-dependent. Parameters ---------- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype Input array. Returns ------- out : ndarray Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype, depending on input types See Also -------- str.title Examples -------- >>> import numpy as np >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c array(['a1b c', '1b ca', 'b ca1', 'ca1b'], dtype='|S5') >>> np.strings.title(c) array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'], dtype='|S5') """ a_arr = np.asarray(a) return _vec_string(a_arr, a_arr.dtype, 'title') def _replace_dispatcher(a, old, new, count=None): return (a,) @set_module("numpy.strings") @array_function_dispatch(_replace_dispatcher) def replace(a, old, new, count=-1): """ For each element in ``a``, return a copy of the string with occurrences of substring ``old`` replaced by ``new``. Parameters ---------- a : array_like, with ``bytes_`` or ``str_`` dtype old, new : array_like, with ``bytes_`` or ``str_`` dtype count : array_like, with ``int_`` dtype If the optional argument ``count`` is given, only the first ``count`` occurrences are replaced. Returns ------- out : ndarray Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype, depending on input types See Also -------- str.replace Examples -------- >>> import numpy as np >>> a = np.array(["That is a mango", "Monkeys eat mangos"]) >>> np.strings.replace(a, 'mango', 'banana') array(['That is a banana', 'Monkeys eat bananas'], dtype='>> a = np.array(["The dish is fresh", "This is it"]) >>> np.strings.replace(a, 'is', 'was') array(['The dwash was fresh', 'Thwas was it'], dtype='>> import numpy as np >>> np.strings.join('-', 'osd') # doctest: +SKIP array('o-s-d', dtype='>> np.strings.join(['-', '.'], ['ghc', 'osd']) # doctest: +SKIP array(['g-h-c', 'o.s.d'], dtype='>> import numpy as np >>> x = np.array("Numpy is nice!") >>> np.strings.split(x, " ") # doctest: +SKIP array(list(['Numpy', 'is', 'nice!']), dtype=object) # doctest: +SKIP >>> np.strings.split(x, " ", 1) # doctest: +SKIP array(list(['Numpy', 'is nice!']), dtype=object) # doctest: +SKIP See Also -------- str.split, rsplit """ # This will return an array of lists of different sizes, so we # leave it as an object array return _vec_string( a, np.object_, 'split', [sep] + _clean_args(maxsplit)) @array_function_dispatch(_split_dispatcher) def _rsplit(a, sep=None, maxsplit=None): """ For each element in `a`, return a list of the words in the string, using `sep` as the delimiter string. Calls :meth:`str.rsplit` element-wise. Except for splitting from the right, `rsplit` behaves like `split`. Parameters ---------- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype sep : str or unicode, optional If `sep` is not specified or None, any whitespace string is a separator. maxsplit : int, optional If `maxsplit` is given, at most `maxsplit` splits are done, the rightmost ones. Returns ------- out : ndarray Array of list objects See Also -------- str.rsplit, split Examples -------- >>> import numpy as np >>> a = np.array(['aAaAaA', 'abBABba']) >>> np.strings.rsplit(a, 'A') # doctest: +SKIP array([list(['a', 'a', 'a', '']), # doctest: +SKIP list(['abB', 'Bba'])], dtype=object) # doctest: +SKIP """ # This will return an array of lists of different sizes, so we # leave it as an object array return _vec_string( a, np.object_, 'rsplit', [sep] + _clean_args(maxsplit)) def _splitlines_dispatcher(a, keepends=None): return (a,) @array_function_dispatch(_splitlines_dispatcher) def _splitlines(a, keepends=None): """ For each element in `a`, return a list of the lines in the element, breaking at line boundaries. Calls :meth:`str.splitlines` element-wise. Parameters ---------- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype keepends : bool, optional Line breaks are not included in the resulting list unless keepends is given and true. Returns ------- out : ndarray Array of list objects See Also -------- str.splitlines Examples -------- >>> np.char.splitlines("first line\\nsecond line") array(list(['first line', 'second line']), dtype=object) >>> a = np.array(["first\\nsecond", "third\\nfourth"]) >>> np.char.splitlines(a) array([list(['first', 'second']), list(['third', 'fourth'])], dtype=object) """ return _vec_string( a, np.object_, 'splitlines', _clean_args(keepends)) def _partition_dispatcher(a, sep): return (a,) @set_module("numpy.strings") @array_function_dispatch(_partition_dispatcher) def partition(a, sep): """ Partition each element in ``a`` around ``sep``. For each element in ``a``, split the element at the first occurrence of ``sep``, and return a 3-tuple containing the part before the separator, the separator itself, and the part after the separator. If the separator is not found, the first item of the tuple will contain the whole string, and the second and third ones will be the empty string. Parameters ---------- a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype Input array sep : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype Separator to split each string element in ``a``. Returns ------- out : 3-tuple: - array with ``StringDType``, ``bytes_`` or ``str_`` dtype with the part before the separator - array with ``StringDType``, ``bytes_`` or ``str_`` dtype with the separator - array with ``StringDType``, ``bytes_`` or ``str_`` dtype with the part after the separator See Also -------- str.partition Examples -------- >>> import numpy as np >>> x = np.array(["Numpy is nice!"]) >>> np.strings.partition(x, " ") (array(['Numpy'], dtype='>> import numpy as np >>> a = np.array(['aAaAaA', ' aA ', 'abBABba']) >>> np.strings.rpartition(a, 'A') (array(['aAaAa', ' a', 'abB'], dtype='>> import numpy as np >>> a = np.array(['a1b c', '1bca', 'bca1']) >>> table = a[0].maketrans('abc', '123') >>> deletechars = ' ' >>> np.char.translate(a, table, deletechars) array(['112 3', '1231', '2311'], dtype='>> import numpy as np >>> a = np.array(['hello', 'world']) >>> np.strings.slice(a, 2) array(['he', 'wo'], dtype='>> np.strings.slice(a, 1, 5, 2) array(['el', 'ol'], dtype='>> np.strings.slice(a, np.array([1, 2]), np.array([4, 5])) array(['ell', 'rld'], dtype='>> b = np.array(['hello world', 'γεια σου κόσμε', '你好世界', '👋 🌍'], ... dtype=np.dtypes.StringDType()) >>> np.strings.slice(b, -2) array(['hello wor', 'γεια σου κόσ', '你好', '👋'], dtype=StringDType()) >>> np.strings.slice(b, [3, -10, 2, -3], [-1, -2, -1, 3]) array(['lo worl', ' σου κόσ', '世', '👋 🌍'], dtype=StringDType()) >>> np.strings.slice(b, None, None, -1) array(['dlrow olleh', 'εμσόκ υοσ αιεγ', '界世好你', '🌍 👋'], dtype=StringDType()) """ # Just like in the construction of a regular slice object, if only start # is specified then start will become stop, see logic in slice_new. if stop is None: stop = start start = None # adjust start, stop, step to be integers, see logic in PySlice_Unpack if step is None: step = 1 step = np.asanyarray(step) if not np.issubdtype(step.dtype, np.integer): raise TypeError(f"unsupported type {step.dtype} for operand 'step'") if np.any(step == 0): raise ValueError("slice step cannot be zero") if start is None: start = np.where(step < 0, np.iinfo(np.intp).max, 0) if stop is None: stop = np.where(step < 0, np.iinfo(np.intp).min, np.iinfo(np.intp).max) return _slice(a, start, stop, step)