Source code for sparse.dok

from numbers import Integral

import numpy as np

from .slicing import normalize_index
from .utils import equivalent
from .sparse_array import SparseArray
from .compatibility import int, range, zip


[docs]class DOK(SparseArray): """ A class for building sparse multidimensional arrays. Parameters ---------- shape : tuple[int] (DOK.ndim,) The shape of the array. data : dict, optional The key-value pairs for the data in this array. dtype : np.dtype, optional The data type of this array. If left empty, it is inferred from the first element. fill_value : scalar, optional The fill value of this array. Attributes ---------- dtype : numpy.dtype The datatype of this array. Can be :code:`None` if no elements have been set yet. shape : tuple[int] The shape of this array. data : dict The keys of this dictionary contain all the indices and the values contain the nonzero entries. See Also -------- COO : A read-only sparse array. Examples -------- You can create :obj:`DOK` objects from Numpy arrays. >>> x = np.eye(5, dtype=np.uint8) >>> x[2, 3] = 5 >>> s = DOK.from_numpy(x) >>> s <DOK: shape=(5, 5), dtype=uint8, nnz=6, fill_value=0> You can also create them from just shapes, and use slicing assignment. >>> s2 = DOK((5, 5), dtype=np.int64) >>> s2[1:3, 1:3] = [[4, 5], [6, 7]] >>> s2 <DOK: shape=(5, 5), dtype=int64, nnz=4, fill_value=0> You can convert :obj:`DOK` arrays to :obj:`COO` arrays, or :obj:`numpy.ndarray` objects. >>> from sparse import COO >>> s3 = COO(s2) >>> s3 <COO: shape=(5, 5), dtype=int64, nnz=4, fill_value=0> >>> s2.todense() # doctest: +NORMALIZE_WHITESPACE array([[0, 0, 0, 0, 0], [0, 4, 5, 0, 0], [0, 6, 7, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]) >>> s4 = COO.from_numpy(np.eye(4, dtype=np.uint8)) >>> s4 <COO: shape=(4, 4), dtype=uint8, nnz=4, fill_value=0> >>> s5 = DOK.from_coo(s4) >>> s5 <DOK: shape=(4, 4), dtype=uint8, nnz=4, fill_value=0> You can also create :obj:`DOK` arrays from a shape and a dict of values. Zeros are automatically ignored. >>> values = { ... (1, 2, 3): 4, ... (3, 2, 1): 0, ... } >>> s6 = DOK((5, 5, 5), values) >>> s6 <DOK: shape=(5, 5, 5), dtype=int64, nnz=1, fill_value=0.0> """ def __init__(self, shape, data=None, dtype=None, fill_value=None): from .coo import COO self.data = dict() if isinstance(shape, COO): ar = DOK.from_coo(shape) self._make_shallow_copy_of(ar) return if isinstance(shape, np.ndarray): ar = DOK.from_numpy(shape) self._make_shallow_copy_of(ar) return self.dtype = np.dtype(dtype) if not data: data = dict() super(DOK, self).__init__(shape, fill_value=fill_value) if isinstance(data, dict): if not dtype: if not len(data): self.dtype = np.dtype('float64') else: self.dtype = np.result_type(*map(lambda x: np.asarray(x).dtype, data.values())) for c, d in data.items(): self[c] = d else: raise ValueError('data must be a dict.') def _make_shallow_copy_of(self, other): self.dtype = other.dtype self.data = other.data super(DOK, self).__init__(other.shape)
[docs] @classmethod def from_coo(cls, x): """ Get a :obj:`DOK` array from a :obj:`COO` array. Parameters ---------- x : COO The array to convert. Returns ------- DOK The equivalent :obj:`DOK` array. Examples -------- >>> from sparse import COO >>> s = COO.from_numpy(np.eye(4)) >>> s2 = DOK.from_coo(s) >>> s2 <DOK: shape=(4, 4), dtype=float64, nnz=4, fill_value=0.0> """ ar = cls(x.shape, dtype=x.dtype, fill_value=x.fill_value) for c, d in zip(x.coords.T, x.data): ar.data[tuple(c)] = d return ar
[docs] def to_coo(self): """ Convert this :obj:`DOK` array to a :obj:`COO` array. Returns ------- COO The equivalent :obj:`COO` array. Examples -------- >>> s = DOK((5, 5)) >>> s[1:3, 1:3] = [[4, 5], [6, 7]] >>> s <DOK: shape=(5, 5), dtype=float64, nnz=4, fill_value=0.0> >>> s2 = s.to_coo() >>> s2 <COO: shape=(5, 5), dtype=float64, nnz=4, fill_value=0.0> """ from .coo import COO return COO(self)
[docs] @classmethod def from_numpy(cls, x): """ Get a :obj:`DOK` array from a Numpy array. Parameters ---------- x : np.ndarray The array to convert. Returns ------- DOK The equivalent :obj:`DOK` array. Examples -------- >>> s = DOK.from_numpy(np.eye(4)) >>> s <DOK: shape=(4, 4), dtype=float64, nnz=4, fill_value=0.0> """ ar = cls(x.shape, dtype=x.dtype) coords = np.nonzero(x) data = x[coords] for c in zip(data, *coords): d, c = c[0], c[1:] ar.data[c] = d return ar
@property def nnz(self): """ The number of nonzero elements in this array. Returns ------- int The number of nonzero elements. See Also -------- COO.nnz : Equivalent :obj:`COO` array property. numpy.count_nonzero : A similar Numpy function. scipy.sparse.dok_matrix.nnz : The Scipy equivalent property. Examples -------- >>> values = { ... (1, 2, 3): 4, ... (3, 2, 1): 0, ... } >>> s = DOK((5, 5, 5), values) >>> s.nnz 1 """ return len(self.data) def __getitem__(self, key): key = normalize_index(key, self.shape) if not all(isinstance(i, Integral) for i in key): raise NotImplementedError('All indices must be integers' ' when getting an item.') if len(key) != self.ndim: raise NotImplementedError('Can only get single elements. ' 'Expected key of length %d, got %s' % (self.ndim, str(key))) key = tuple(int(k) for k in key) if key in self.data: return self.data[key] else: return self.fill_value def __setitem__(self, key, value): key = normalize_index(key, self.shape) value = np.asanyarray(value) value = value.astype(self.dtype) key_list = [int(k) if isinstance(k, Integral) else k for k in key] self._setitem(key_list, value) def _setitem(self, key_list, value): value_missing_dims = len([ind for ind in key_list if isinstance(ind, slice)]) - value.ndim if value_missing_dims < 0: raise ValueError('setting an array element with a sequence.') for i, ind in enumerate(key_list): if isinstance(ind, slice): step = ind.step if ind.step is not None else 1 if step > 0: start = ind.start if ind.start is not None else 0 start = max(start, 0) stop = ind.stop if ind.stop is not None else self.shape[i] stop = min(stop, self.shape[i]) if start > stop: start = stop else: start = ind.start or self.shape[i] - 1 stop = ind.stop if ind.stop is not None else -1 start = min(start, self.shape[i] - 1) stop = max(stop, -1) if start < stop: start = stop key_list_temp = key_list[:] for v_idx, ki in enumerate(range(start, stop, step)): key_list_temp[i] = ki vi = value if value_missing_dims > 0 else \ (value[0] if value.shape[0] == 1 else value[v_idx]) self._setitem(key_list_temp, vi) return elif not isinstance(ind, Integral): raise IndexError('All indices must be slices or integers' ' when setting an item.') key = tuple(key_list) if not equivalent(value, self.fill_value): self.data[key] = value[()] elif key in self.data: del self.data[key] def __str__(self): return '<DOK: shape={!s}, dtype={!s}, nnz={:d}, fill_value={!s}>'.format( self.shape, self.dtype, self.nnz, self.fill_value ) __repr__ = __str__
[docs] def todense(self): """ Convert this :obj:`DOK` array into a Numpy array. Returns ------- numpy.ndarray The equivalent dense array. See Also -------- COO.todense : Equivalent :obj:`COO` array method. scipy.sparse.dok_matrix.todense : Equivalent Scipy method. Examples -------- >>> s = DOK((5, 5)) >>> s[1:3, 1:3] = [[4, 5], [6, 7]] >>> s.todense() # doctest: +SKIP array([[0., 0., 0., 0., 0.], [0., 4., 5., 0., 0.], [0., 6., 7., 0., 0.], [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.]]) """ result = np.full(self.shape, self.fill_value, self.dtype) for c, d in self.data.items(): result[c] = d return result
[docs] def asformat(self, format): """ Convert this sparse array to a given format. Parameters ---------- format : str A format string. Returns ------- out : SparseArray The converted array. Raises ------ NotImplementedError If the format isn't supported. """ if format == 'dok' or format is DOK: return self from .coo import COO if format == 'coo' or format is COO: return COO.from_iter(self.data, shape=self.shape, fill_value=self.fill_value) raise NotImplementedError('The given format is not supported.')