Source code for sparse.dok

from numbers import Integral

import numpy as np

from .slicing import normalize_index
from .utils import equivalent
from .sparse_array import SparseArray


[docs]class DOK(SparseArray):
    """
    A class for building sparse multidimensional arrays.

    Parameters
    ----------
    shape : tuple[int] (DOK.ndim,)
        The shape of the array.
    data : dict, optional
        The key-value pairs for the data in this array.
    dtype : np.dtype, optional
        The data type of this array. If left empty, it is inferred from
        the first element.
    fill_value : scalar, optional
        The fill value of this array.

    Attributes
    ----------
    dtype : numpy.dtype
        The datatype of this array. Can be :code:`None` if no elements
        have been set yet.
    shape : tuple[int]
        The shape of this array.
    data : dict
        The keys of this dictionary contain all the indices and the values
        contain the nonzero entries.

    See Also
    --------
    COO : A read-only sparse array.

    Examples
    --------
    You can create :obj:`DOK` objects from Numpy arrays.

    >>> x = np.eye(5, dtype=np.uint8)
    >>> x[2, 3] = 5
    >>> s = DOK.from_numpy(x)
    >>> s
    <DOK: shape=(5, 5), dtype=uint8, nnz=6, fill_value=0>

    You can also create them from just shapes, and use slicing assignment.

    >>> s2 = DOK((5, 5), dtype=np.int64)
    >>> s2[1:3, 1:3] = [[4, 5], [6, 7]]
    >>> s2
    <DOK: shape=(5, 5), dtype=int64, nnz=4, fill_value=0>

    You can convert :obj:`DOK` arrays to :obj:`COO` arrays, or :obj:`numpy.ndarray`
    objects.

    >>> from sparse import COO
    >>> s3 = COO(s2)
    >>> s3
    <COO: shape=(5, 5), dtype=int64, nnz=4, fill_value=0>
    >>> s2.todense()  # doctest: +NORMALIZE_WHITESPACE
    array([[0, 0, 0, 0, 0],
           [0, 4, 5, 0, 0],
           [0, 6, 7, 0, 0],
           [0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0]])

    >>> s4 = COO.from_numpy(np.eye(4, dtype=np.uint8))
    >>> s4
    <COO: shape=(4, 4), dtype=uint8, nnz=4, fill_value=0>
    >>> s5 = DOK.from_coo(s4)
    >>> s5
    <DOK: shape=(4, 4), dtype=uint8, nnz=4, fill_value=0>

    You can also create :obj:`DOK` arrays from a shape and a dict of
    values. Zeros are automatically ignored.

    >>> values = {
    ...     (1, 2, 3): 4,
    ...     (3, 2, 1): 0,
    ... }
    >>> s6 = DOK((5, 5, 5), values)
    >>> s6
    <DOK: shape=(5, 5, 5), dtype=int64, nnz=1, fill_value=0.0>
    """

    def __init__(self, shape, data=None, dtype=None, fill_value=None):
        from .coo import COO
        self.data = dict()

        if isinstance(shape, COO):
            ar = DOK.from_coo(shape)
            self._make_shallow_copy_of(ar)
            return

        if isinstance(shape, np.ndarray):
            ar = DOK.from_numpy(shape)
            self._make_shallow_copy_of(ar)
            return

        self.dtype = np.dtype(dtype)

        if not data:
            data = dict()

        super().__init__(shape, fill_value=fill_value)

        if isinstance(data, dict):
            if not dtype:
                if not len(data):
                    self.dtype = np.dtype('float64')
                else:
                    self.dtype = np.result_type(*map(lambda x: np.asarray(x).dtype, data.values()))

            for c, d in data.items():
                self[c] = d
        else:
            raise ValueError('data must be a dict.')

    def _make_shallow_copy_of(self, other):
        self.dtype = other.dtype
        self.data = other.data
        super().__init__(other.shape, fill_value=other.fill_value)

[docs]    @classmethod
    def from_coo(cls, x):
        """
        Get a :obj:`DOK` array from a :obj:`COO` array.

        Parameters
        ----------
        x : COO
            The array to convert.

        Returns
        -------
        DOK
            The equivalent :obj:`DOK` array.

        Examples
        --------
        >>> from sparse import COO
        >>> s = COO.from_numpy(np.eye(4))
        >>> s2 = DOK.from_coo(s)
        >>> s2
        <DOK: shape=(4, 4), dtype=float64, nnz=4, fill_value=0.0>
        """
        ar = cls(x.shape, dtype=x.dtype, fill_value=x.fill_value)

        for c, d in zip(x.coords.T, x.data):
            ar.data[tuple(c)] = d

        return ar

[docs]    def to_coo(self):
        """
        Convert this :obj:`DOK` array to a :obj:`COO` array.

        Returns
        -------
        COO
            The equivalent :obj:`COO` array.

        Examples
        --------
        >>> s = DOK((5, 5))
        >>> s[1:3, 1:3] = [[4, 5], [6, 7]]
        >>> s
        <DOK: shape=(5, 5), dtype=float64, nnz=4, fill_value=0.0>
        >>> s2 = s.to_coo()
        >>> s2
        <COO: shape=(5, 5), dtype=float64, nnz=4, fill_value=0.0>
        """
        from .coo import COO
        return COO(self)

[docs]    @classmethod
    def from_numpy(cls, x):
        """
        Get a :obj:`DOK` array from a Numpy array.

        Parameters
        ----------
        x : np.ndarray
            The array to convert.

        Returns
        -------
        DOK
            The equivalent :obj:`DOK` array.

        Examples
        --------
        >>> s = DOK.from_numpy(np.eye(4))
        >>> s
        <DOK: shape=(4, 4), dtype=float64, nnz=4, fill_value=0.0>
        """
        ar = cls(x.shape, dtype=x.dtype)

        coords = np.nonzero(x)
        data = x[coords]

        for c in zip(data, *coords):
            d, c = c[0], c[1:]
            ar.data[c] = d

        return ar

    @property
    def nnz(self):
        """
        The number of nonzero elements in this array.

        Returns
        -------
        int
            The number of nonzero elements.

        See Also
        --------
        COO.nnz : Equivalent :obj:`COO` array property.
        numpy.count_nonzero : A similar Numpy function.
        scipy.sparse.dok_matrix.nnz : The Scipy equivalent property.

        Examples
        --------
        >>> values = {
        ...     (1, 2, 3): 4,
        ...     (3, 2, 1): 0,
        ... }
        >>> s = DOK((5, 5, 5), values)
        >>> s.nnz
        1
        """
        return len(self.data)

    def __getitem__(self, key):
        key = normalize_index(key, self.shape)

        if not all(isinstance(i, Integral) for i in key):
            raise NotImplementedError('All indices must be integers'
                                      ' when getting an item.')

        if len(key) != self.ndim:
            raise NotImplementedError('Can only get single elements. '
                                      'Expected key of length %d, got %s'
                                      % (self.ndim, str(key)))

        key = tuple(int(k) for k in key)

        if key in self.data:
            return self.data[key]
        else:
            return self.fill_value

    def __setitem__(self, key, value):
        key = normalize_index(key, self.shape)
        value = np.asanyarray(value)

        value = value.astype(self.dtype)

        key_list = [int(k) if isinstance(k, Integral) else k for k in key]

        self._setitem(key_list, value)

    def _setitem(self, key_list, value):
        value_missing_dims = len([ind for ind in key_list if isinstance(ind, slice)]) - value.ndim

        if value_missing_dims < 0:
            raise ValueError('setting an array element with a sequence.')

        for i, ind in enumerate(key_list):
            if isinstance(ind, slice):
                step = ind.step if ind.step is not None else 1
                if step > 0:
                    start = ind.start if ind.start is not None else 0
                    start = max(start, 0)
                    stop = ind.stop if ind.stop is not None else self.shape[i]
                    stop = min(stop, self.shape[i])
                    if start > stop:
                        start = stop
                else:
                    start = ind.start or self.shape[i] - 1
                    stop = ind.stop if ind.stop is not None else -1
                    start = min(start, self.shape[i] - 1)
                    stop = max(stop, -1)
                    if start < stop:
                        start = stop

                key_list_temp = key_list[:]
                for v_idx, ki in enumerate(range(start, stop, step)):
                    key_list_temp[i] = ki
                    vi = value if value_missing_dims > 0 else \
                        (value[0] if value.shape[0] == 1 else value[v_idx])
                    self._setitem(key_list_temp, vi)

                return
            elif not isinstance(ind, Integral):
                raise IndexError('All indices must be slices or integers'
                                 ' when setting an item.')

        key = tuple(key_list)
        if not equivalent(value, self.fill_value):
            self.data[key] = value[()]
        elif key in self.data:
            del self.data[key]

    def __str__(self):
        return '<DOK: shape={!s}, dtype={!s}, nnz={:d}, fill_value={!s}>'.format(
            self.shape, self.dtype, self.nnz, self.fill_value
        )

    __repr__ = __str__

[docs]    def todense(self):
        """
        Convert this :obj:`DOK` array into a Numpy array.

        Returns
        -------
        numpy.ndarray
            The equivalent dense array.

        See Also
        --------
        COO.todense : Equivalent :obj:`COO` array method.
        scipy.sparse.dok_matrix.todense : Equivalent Scipy method.

        Examples
        --------
        >>> s = DOK((5, 5))
        >>> s[1:3, 1:3] = [[4, 5], [6, 7]]
        >>> s.todense()  # doctest: +SKIP
        array([[0., 0., 0., 0., 0.],
               [0., 4., 5., 0., 0.],
               [0., 6., 7., 0., 0.],
               [0., 0., 0., 0., 0.],
               [0., 0., 0., 0., 0.]])
        """
        result = np.full(self.shape, self.fill_value, self.dtype)

        for c, d in self.data.items():
            result[c] = d

        return result

[docs]    def asformat(self, format):
        """
        Convert this sparse array to a given format.

        Parameters
        ----------
        format : str
            A format string.

        Returns
        -------
        out : SparseArray
            The converted array.

        Raises
        ------
        NotImplementedError
            If the format isn't supported.
        """
        if format == 'dok' or format is DOK:
            return self

        from .coo import COO
        if format == 'coo' or format is COO:
            return COO.from_iter(self.data, shape=self.shape, fill_value=self.fill_value)

        raise NotImplementedError('The given format is not supported.')