Source code for sparse._sparse_array

from abc import ABCMeta, abstractmethod
from collections.abc import Iterable
from numbers import Integral
from typing import Callable
import operator
from functools import reduce

import numpy as np
import scipy.sparse as ss

from ._umath import elemwise
from ._utils import _zero_of_dtype, html_table, equivalent, normalize_axis

_reduce_super_ufunc = {np.add: np.multiply, np.multiply: np.power}


[docs]class SparseArray: """ An abstract base class for all the sparse array classes. Attributes ---------- dtype : numpy.dtype The data type of this array. fill_value : scalar The fill value of this array. """ __metaclass__ = ABCMeta def __init__(self, shape, fill_value=None): if not isinstance(shape, Iterable): shape = (shape,) if not all(isinstance(l, Integral) and int(l) >= 0 for l in shape): raise ValueError( "shape must be an non-negative integer or a tuple " "of non-negative integers." ) self.shape = tuple(int(l) for l in shape) if fill_value is not None: if not hasattr(fill_value, "dtype") or fill_value.dtype != self.dtype: self.fill_value = self.dtype.type(fill_value) else: self.fill_value = fill_value else: self.fill_value = _zero_of_dtype(self.dtype) dtype = None @property @abstractmethod def nnz(self): """ The number of nonzero elements in this array. Note that any duplicates in :code:`coords` are counted multiple times. To avoid this, call :obj:`COO.sum_duplicates`. Returns ------- int The number of nonzero elements in this array. See Also -------- DOK.nnz : Equivalent :obj:`DOK` array property. numpy.count_nonzero : A similar Numpy function. scipy.sparse.coo_matrix.nnz : The Scipy equivalent property. Examples -------- >>> import numpy as np >>> from sparse import COO >>> x = np.array([0, 0, 1, 0, 1, 2, 0, 1, 2, 3, 0, 0]) >>> np.count_nonzero(x) 6 >>> s = COO.from_numpy(x) >>> s.nnz 6 >>> np.count_nonzero(x) == s.nnz True """ @property def ndim(self): """ The number of dimensions of this array. Returns ------- int The number of dimensions of this array. See Also -------- DOK.ndim : Equivalent property for :obj:`DOK` arrays. numpy.ndarray.ndim : Numpy equivalent property. Examples -------- >>> from sparse import COO >>> import numpy as np >>> x = np.random.rand(1, 2, 3, 1, 2) >>> s = COO.from_numpy(x) >>> s.ndim 5 >>> s.ndim == x.ndim True """ return len(self.shape) @property def size(self): """ The number of all elements (including zeros) in this array. Returns ------- int The number of elements. See Also -------- numpy.ndarray.size : Numpy equivalent property. Examples -------- >>> from sparse import COO >>> import numpy as np >>> x = np.zeros((10, 10)) >>> s = COO.from_numpy(x) >>> s.size 100 """ # We use this instead of np.prod because np.prod # returns a float64 for an empty shape. return reduce(operator.mul, self.shape, 1) @property def density(self): """ The ratio of nonzero to all elements in this array. Returns ------- float The ratio of nonzero to all elements. See Also -------- COO.size : Number of elements. COO.nnz : Number of nonzero elements. Examples -------- >>> import numpy as np >>> from sparse import COO >>> x = np.zeros((8, 8)) >>> x[0, :] = 1 >>> s = COO.from_numpy(x) >>> s.density 0.125 """ return self.nnz / self.size def _repr_html_(self): """ Diagnostic report about this array. Renders in Jupyter. """ return html_table(self)
[docs] @abstractmethod def asformat(self, format): """ Convert this sparse array to a given format. Parameters ---------- format : str A format string. Returns ------- out : SparseArray The converted array. Raises ------ NotImplementedError If the format isn't supported. """
[docs] @abstractmethod def todense(self): """ Convert this :obj:`SparseArray` array to a dense :obj:`numpy.ndarray`. Note that this may take a large amount of memory and time. Returns ------- numpy.ndarray The converted dense array. See Also -------- DOK.todense : Equivalent :obj:`DOK` array method. COO.todense : Equivalent :obj:`COO` array method. scipy.sparse.coo_matrix.todense : Equivalent Scipy method. Examples -------- >>> import sparse >>> x = np.random.randint(100, size=(7, 3)) >>> s = sparse.COO.from_numpy(x) >>> x2 = s.todense() >>> np.array_equal(x, x2) True """
def _make_shallow_copy_of(self, other): self.__dict__ = other.__dict__.copy() def __array__(self, *args, **kwargs): from ._settings import AUTO_DENSIFY if not AUTO_DENSIFY: raise RuntimeError( "Cannot convert a sparse array to dense automatically. " "To manually densify, use the todense method." ) return np.asarray(self.todense(), *args, **kwargs) def __array_function__(self, func, types, args, kwargs): import sparse as module sparse_func = None try: submodules = getattr(func, "__module__", "numpy").split(".")[1:] for submodule in submodules: module = getattr(module, submodule) sparse_func = getattr(module, func.__name__) except AttributeError: pass else: return sparse_func(*args, **kwargs) try: sparse_func = getattr(type(self), func.__name__) except AttributeError: pass if ( not isinstance(sparse_func, Callable) and len(args) == 1 and len(kwargs) == 0 ): try: return getattr(self, func.__name__) except AttributeError: pass if sparse_func is None: return NotImplemented return sparse_func(*args, **kwargs) @staticmethod def _reduce(method, *args, **kwargs): assert len(args) == 1 self = args[0] if isinstance(self, ss.spmatrix): self = type(self).from_scipy_sparse(self) return self.reduce(method, **kwargs) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): out = kwargs.pop("out", None) if out is not None and not all(isinstance(x, type(self)) for x in out): return NotImplemented if getattr(ufunc, "signature", None) is not None: return self.__array_function__( ufunc, (np.ndarray, type(self)), inputs, kwargs ) if out is not None: kwargs["dtype"] = out[0].dtype if method == "outer": method = "__call__" cum_ndim = 0 inputs_transformed = [] for inp in reversed(inputs): inputs_transformed.append(inp[(Ellipsis,) + (None,) * cum_ndim]) cum_ndim += inp.ndim inputs = tuple(reversed(inputs_transformed)) if method == "__call__": result = elemwise(ufunc, *inputs, **kwargs) elif method == "reduce": result = SparseArray._reduce(ufunc, *inputs, **kwargs) else: return NotImplemented if out is not None: (out,) = out if out.shape != result.shape: raise ValueError( "non-broadcastable output operand with shape %s " "doesn't match the broadcast shape %s" % (out.shape, result.shape) ) out._make_shallow_copy_of(result) return out return result def reduce(self, method, axis=(0,), keepdims=False, **kwargs): """ Performs a reduction operation on this array. Parameters ---------- method : numpy.ufunc The method to use for performing the reduction. axis : Union[int, Iterable[int]], optional The axes along which to perform the reduction. Uses all axes by default. keepdims : bool, optional Whether or not to keep the dimensions of the original array. kwargs : dict Any extra arguments to pass to the reduction operation. See Also -------- numpy.ufunc.reduce : A similar Numpy method. COO.reduce : This method implemented on COO arrays. GCXS.reduce : This method implemented on GCXS arrays. """ axis = normalize_axis(axis, self.ndim) zero_reduce_result = method.reduce([self.fill_value, self.fill_value], **kwargs) reduce_super_ufunc = None if not equivalent(zero_reduce_result, self.fill_value): reduce_super_ufunc = _reduce_super_ufunc.get(method, None) if reduce_super_ufunc is None: raise ValueError( "Performing this reduction operation would produce " "a dense result: %s" % str(method) ) if not isinstance(axis, tuple): axis = (axis,) out = self._reduce_calc(method, axis, keepdims, **kwargs) if len(out) == 1: return out[0] data, counts, axis, n_cols, arr_attrs = out result_fill_value = self.fill_value if reduce_super_ufunc is None: missing_counts = counts != n_cols data[missing_counts] = method( data[missing_counts], self.fill_value, **kwargs ) else: data = method( data, reduce_super_ufunc(self.fill_value, n_cols - counts), ).astype(data.dtype) result_fill_value = reduce_super_ufunc(self.fill_value, n_cols) out = self._reduce_return(data, arr_attrs, result_fill_value) if keepdims: shape = list(self.shape) for ax in axis: shape[ax] = 1 out = out.reshape(shape) if out.ndim == 0: return out[()] return out def _reduce_calc(self, method, axis, keepdims, **kwargs): raise NotImplementedError def _reduce_return(self, data, arr_attrs, result_fill_value): raise NotImplementedError def sum(self, axis=None, keepdims=False, dtype=None, out=None): """ Performs a sum operation along the given axes. Uses all axes by default. Parameters ---------- axis : Union[int, Iterable[int]], optional The axes along which to sum. Uses all axes by default. keepdims : bool, optional Whether or not to keep the dimensions of the original array. dtype: numpy.dtype The data type of the output array. Returns ------- SparseArray The reduced output sparse array. See Also -------- :obj:`numpy.sum` : Equivalent numpy function. scipy.sparse.coo_matrix.sum : Equivalent Scipy function. """ return np.add.reduce(self, out=out, axis=axis, keepdims=keepdims, dtype=dtype) def max(self, axis=None, keepdims=False, out=None): """ Maximize along the given axes. Uses all axes by default. Parameters ---------- axis : Union[int, Iterable[int]], optional The axes along which to maximize. Uses all axes by default. keepdims : bool, optional Whether or not to keep the dimensions of the original array. dtype: numpy.dtype The data type of the output array. Returns ------- SparseArray The reduced output sparse array. See Also -------- :obj:`numpy.max` : Equivalent numpy function. scipy.sparse.coo_matrix.max : Equivalent Scipy function. """ return np.maximum.reduce(self, out=out, axis=axis, keepdims=keepdims) amax = max def any(self, axis=None, keepdims=False, out=None): """ See if any values along array are ``True``. Uses all axes by default. Parameters ---------- axis : Union[int, Iterable[int]], optional The axes along which to minimize. Uses all axes by default. keepdims : bool, optional Whether or not to keep the dimensions of the original array. Returns ------- SparseArray The reduced output sparse array. See Also -------- :obj:`numpy.all` : Equivalent numpy function. """ return np.logical_or.reduce(self, out=out, axis=axis, keepdims=keepdims) def all(self, axis=None, keepdims=False, out=None): """ See if all values in an array are ``True``. Uses all axes by default. Parameters ---------- axis : Union[int, Iterable[int]], optional The axes along which to minimize. Uses all axes by default. keepdims : bool, optional Whether or not to keep the dimensions of the original array. Returns ------- SparseArray The reduced output sparse array. See Also -------- :obj:`numpy.all` : Equivalent numpy function. """ return np.logical_and.reduce(self, out=out, axis=axis, keepdims=keepdims) def min(self, axis=None, keepdims=False, out=None): """ Minimize along the given axes. Uses all axes by default. Parameters ---------- axis : Union[int, Iterable[int]], optional The axes along which to minimize. Uses all axes by default. keepdims : bool, optional Whether or not to keep the dimensions of the original array. dtype: numpy.dtype The data type of the output array. Returns ------- SparseArray The reduced output sparse array. See Also -------- :obj:`numpy.min` : Equivalent numpy function. scipy.sparse.coo_matrix.min : Equivalent Scipy function. """ return np.minimum.reduce(self, out=out, axis=axis, keepdims=keepdims) amin = min def prod(self, axis=None, keepdims=False, dtype=None, out=None): """ Performs a product operation along the given axes. Uses all axes by default. Parameters ---------- axis : Union[int, Iterable[int]], optional The axes along which to multiply. Uses all axes by default. keepdims : bool, optional Whether or not to keep the dimensions of the original array. dtype: numpy.dtype The data type of the output array. Returns ------- SparseArray The reduced output sparse array. See Also -------- :obj:`numpy.prod` : Equivalent numpy function. """ return np.multiply.reduce( self, out=out, axis=axis, keepdims=keepdims, dtype=dtype ) def round(self, decimals=0, out=None): """ Evenly round to the given number of decimals. See also -------- :obj:`numpy.round` : NumPy equivalent ufunc. :obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two arguments. """ if out is not None and not isinstance(out, tuple): out = (out,) return self.__array_ufunc__( np.round, "__call__", self, decimals=decimals, out=out ) round_ = round def clip(self, min=None, max=None, out=None): """ Clip (limit) the values in the array. Return an array whose values are limited to ``[min, max]``. One of min or max must be given. See Also -------- sparse.clip : For full documentation and more details. numpy.clip : Equivalent NumPy function. """ if min is None and max is None: raise ValueError("One of max or min must be given.") if out is not None and not isinstance(out, tuple): out = (out,) return self.__array_ufunc__( np.clip, "__call__", self, a_min=min, a_max=max, out=out ) def astype(self, dtype, casting="unsafe", copy=True): """ Copy of the array, cast to a specified type. See also -------- scipy.sparse.coo_matrix.astype : SciPy sparse equivalent function numpy.ndarray.astype : NumPy equivalent ufunc. :obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two arguments. """ # this matches numpy's behavior if self.dtype == dtype and not copy: return self return self.__array_ufunc__( np.ndarray.astype, "__call__", self, dtype=dtype, copy=copy, casting=casting ) def mean(self, axis=None, keepdims=False, dtype=None, out=None): """ Compute the mean along the given axes. Uses all axes by default. Parameters ---------- axis : Union[int, Iterable[int]], optional The axes along which to compute the mean. Uses all axes by default. keepdims : bool, optional Whether or not to keep the dimensions of the original array. dtype: numpy.dtype The data type of the output array. Returns ------- SparseArray The reduced output sparse array. See Also -------- numpy.ndarray.mean : Equivalent numpy method. scipy.sparse.coo_matrix.mean : Equivalent Scipy method. Notes ----- * This function internally calls :obj:`COO.sum_duplicates` to bring the array into canonical form. * The :code:`out` parameter is provided just for compatibility with Numpy and isn't actually supported. Examples -------- You can use :obj:`COO.mean` to compute the mean of an array across any dimension. >>> from sparse import COO >>> x = np.array([[1, 2, 0, 0], ... [0, 1, 0, 0]], dtype='i8') >>> s = COO.from_numpy(x) >>> s2 = s.mean(axis=1) >>> s2.todense() # doctest: +SKIP array([0.5, 1.5, 0., 0.]) You can also use the :code:`keepdims` argument to keep the dimensions after the mean. >>> s3 = s.mean(axis=0, keepdims=True) >>> s3.shape (1, 4) You can pass in an output datatype, if needed. >>> s4 = s.mean(axis=0, dtype=np.float16) >>> s4.dtype dtype('float16') By default, this reduces the array down to one number, computing the mean along all axes. >>> s.mean() 0.5 """ if axis is None: axis = tuple(range(self.ndim)) elif not isinstance(axis, tuple): axis = (axis,) den = reduce(operator.mul, (self.shape[i] for i in axis), 1) if dtype is None: if issubclass(self.dtype.type, (np.integer, np.bool_)): dtype = inter_dtype = np.dtype("f8") else: dtype = self.dtype inter_dtype = ( np.dtype("f4") if issubclass(dtype.type, np.float16) else dtype ) else: inter_dtype = dtype num = self.sum(axis=axis, keepdims=keepdims, dtype=inter_dtype) if num.ndim: out = np.true_divide(num, den, casting="unsafe") return out.astype(dtype) if out.dtype != dtype else out return np.divide(num, den, dtype=dtype, out=out) def var(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ Compute the variance along the gi66ven axes. Uses all axes by default. Parameters ---------- axis : Union[int, Iterable[int]], optional The axes along which to compute the variance. Uses all axes by default. dtype : numpy.dtype, optional The output datatype. out: SparseArray, optional The array to write the output to. ddof: int The degrees of freedom. keepdims : bool, optional Whether or not to keep the dimensions of the original array. Returns ------- SparseArray The reduced output sparse array. See Also -------- numpy.ndarray.var : Equivalent numpy method. Notes ----- * This function internally calls :obj:`COO.sum_duplicates` to bring the array into canonical form. Examples -------- You can use :obj:`COO.var` to compute the variance of an array across any dimension. >>> from sparse import COO >>> x = np.array([[1, 2, 0, 0], ... [0, 1, 0, 0]], dtype='i8') >>> s = COO.from_numpy(x) >>> s2 = s.var(axis=1) >>> s2.todense() # doctest: +SKIP array([0.6875, 0.1875]) You can also use the :code:`keepdims` argument to keep the dimensions after the variance. >>> s3 = s.var(axis=0, keepdims=True) >>> s3.shape (1, 4) You can pass in an output datatype, if needed. >>> s4 = s.var(axis=0, dtype=np.float16) >>> s4.dtype dtype('float16') By default, this reduces the array down to one number, computing the variance along all axes. >>> s.var() 0.5 """ axis = normalize_axis(axis, self.ndim) if axis is None: axis = tuple(range(self.ndim)) if not isinstance(axis, tuple): axis = (axis,) rcount = reduce(operator.mul, (self.shape[a] for a in axis), 1) # Make this warning show up on top. if ddof >= rcount: warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning) # Cast bool, unsigned int, and int to float64 by default if dtype is None and issubclass(self.dtype.type, (np.integer, np.bool_)): dtype = np.dtype("f8") arrmean = self.sum(axis, dtype=dtype, keepdims=True) np.divide(arrmean, rcount, out=arrmean) x = self - arrmean if issubclass(self.dtype.type, np.complexfloating): x = x.real * x.real + x.imag * x.imag else: x = np.multiply(x, x, out=x) ret = x.sum(axis=axis, dtype=dtype, out=out, keepdims=keepdims) # Compute degrees of freedom and make sure it is not negative. rcount = max([rcount - ddof, 0]) ret = ret[...] np.divide(ret, rcount, out=ret, casting="unsafe") return ret[()] def std(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ Compute the standard deviation along the given axes. Uses all axes by default. Parameters ---------- axis : Union[int, Iterable[int]], optional The axes along which to compute the standard deviation. Uses all axes by default. dtype : numpy.dtype, optional The output datatype. out: SparseArray, optional The array to write the output to. ddof: int The degrees of freedom. keepdims : bool, optional Whether or not to keep the dimensions of the original array. Returns ------- SparseArray The reduced output sparse array. See Also -------- numpy.ndarray.std : Equivalent numpy method. Notes ----- * This function internally calls :obj:`COO.sum_duplicates` to bring the array into canonical form. Examples -------- You can use :obj:`COO.std` to compute the standard deviation of an array across any dimension. >>> from sparse import COO >>> x = np.array([[1, 2, 0, 0], ... [0, 1, 0, 0]], dtype='i8') >>> s = COO.from_numpy(x) >>> s2 = s.std(axis=1) >>> s2.todense() # doctest: +SKIP array([0.8291562, 0.4330127]) You can also use the :code:`keepdims` argument to keep the dimensions after the standard deviation. >>> s3 = s.std(axis=0, keepdims=True) >>> s3.shape (1, 4) You can pass in an output datatype, if needed. >>> s4 = s.std(axis=0, dtype=np.float16) >>> s4.dtype dtype('float16') By default, this reduces the array down to one number, computing the standard deviation along all axes. >>> s.std() # doctest: +SKIP 0.7071067811865476 """ ret = self.var(axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) ret = np.sqrt(ret) return ret @property def real(self): """The real part of the array. Examples -------- >>> from sparse import COO >>> x = COO.from_numpy([1 + 0j, 0 + 1j]) >>> x.real.todense() # doctest: +SKIP array([1., 0.]) >>> x.real.dtype dtype('float64') Returns ------- out : SparseArray The real component of the array elements. If the array dtype is real, the dtype of the array is used for the output. If the array is complex, the output dtype is float. See Also -------- numpy.ndarray.real : NumPy equivalent attribute. numpy.real : NumPy equivalent function. """ return self.__array_ufunc__(np.real, "__call__", self) @property def imag(self): """The imaginary part of the array. Examples -------- >>> from sparse import COO >>> x = COO.from_numpy([1 + 0j, 0 + 1j]) >>> x.imag.todense() # doctest: +SKIP array([0., 1.]) >>> x.imag.dtype dtype('float64') Returns ------- out : SparseArray The imaginary component of the array elements. If the array dtype is real, the dtype of the array is used for the output. If the array is complex, the output dtype is float. See Also -------- numpy.ndarray.imag : NumPy equivalent attribute. numpy.imag : NumPy equivalent function. """ return self.__array_ufunc__(np.imag, "__call__", self) def conj(self): """Return the complex conjugate, element-wise. The complex conjugate of a complex number is obtained by changing the sign of its imaginary part. Examples -------- >>> from sparse import COO >>> x = COO.from_numpy([1 + 2j, 2 - 1j]) >>> res = x.conj() >>> res.todense() # doctest: +SKIP array([1.-2.j, 2.+1.j]) >>> res.dtype dtype('complex128') Returns ------- out : SparseArray The complex conjugate, with same dtype as the input. See Also -------- numpy.ndarray.conj : NumPy equivalent method. numpy.conj : NumPy equivalent function. """ return np.conj(self)