from __future__ import absolute_import, division, print_function
from collections import Iterable, defaultdict, deque
from functools import reduce, partial
import numbers
import operator
import numpy as np
import scipy.sparse
from .slicing import normalize_index
from .utils import _zero_of_dtype
# zip_longest with Python 2/3 compat
from six.moves import range, zip_longest
try: # Windows compatibility
int = long
except NameError:
pass
[docs]class COO(object):
"""
A sparse multidimensional array.
This is stored in COO format. It depends on NumPy and Scipy.sparse for
computation, but supports arrays of arbitrary dimension.
Parameters
----------
coords : numpy.ndarray (COO.ndim, COO.nnz)
An array holding the index locations of every value
Should have shape (number of dimensions, number of non-zeros)
data : numpy.ndarray (COO.nnz,)
An array of Values
shape : tuple[int] (COO.ndim,), optional
The shape of the array
has_duplicates : bool, optional
A value indicating whether the supplied value for :code:`coords` has
duplicates. Note that setting this to `False` when :code:`coords` does have
duplicates may result in undefined behaviour. See :obj:`COO.sum_duplicates`
sorted : bool, optional
A value indicating whether the values in `coords` are sorted. Note
that setting this to `False` when :code:`coords` isn't sorted may
result in undefined behaviour. See :obj:`COO.sort_indices`.
cache : bool, optional
Whether to enable cacheing for various operations. See
:obj:`COO.enable_caching`
Attributes
----------
coords : numpy.ndarray (ndim, nnz)
An array holding the coordinates of every nonzero element.
data : numpy.ndarray (nnz,)
An array holding the values corresponding to :obj:`COO.coords`.
shape : tuple[int] (ndim,)
The dimensions of this array.
See Also
--------
DOK : A mostly write-only sparse array.
Examples
--------
You can create :obj:`COO` objects from Numpy arrays.
>>> x = np.eye(4, dtype=np.uint8)
>>> x[2, 3] = 5
>>> s = COO.from_numpy(x)
>>> s
<COO: shape=(4, 4), dtype=uint8, nnz=5, sorted=True, duplicates=False>
>>> s.data # doctest: +NORMALIZE_WHITESPACE
array([1, 1, 1, 5, 1], dtype=uint8)
>>> s.coords # doctest: +NORMALIZE_WHITESPACE
array([[0, 1, 2, 2, 3],
[0, 1, 2, 3, 3]], dtype=uint8)
:obj:`COO` objects support basic arithmetic and binary operations.
>>> x2 = np.eye(4, dtype=np.uint8)
>>> x2[3, 2] = 5
>>> s2 = COO.from_numpy(x2)
>>> (s + s2).todense() # doctest: +NORMALIZE_WHITESPACE
array([[2, 0, 0, 0],
[0, 2, 0, 0],
[0, 0, 2, 5],
[0, 0, 5, 2]], dtype=uint8)
>>> (s * s2).todense() # doctest: +NORMALIZE_WHITESPACE
array([[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]], dtype=uint8)
Binary operations support broadcasting.
>>> x3 = np.zeros((4, 1), dtype=np.uint8)
>>> x3[2, 0] = 1
>>> s3 = COO.from_numpy(x3)
>>> (s * s3).todense() # doctest: +NORMALIZE_WHITESPACE
array([[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 1, 5],
[0, 0, 0, 0]], dtype=uint8)
:obj:`COO` objects also support dot products and reductions.
>>> s.dot(s.T).sum(axis=0).todense() # doctest: +NORMALIZE_WHITESPACE
array([ 1, 1, 31, 6], dtype=uint64)
You can use Numpy :code:`ufunc` operations on :obj:`COO` arrays as well.
>>> np.sum(s, axis=1).todense() # doctest: +NORMALIZE_WHITESPACE
array([1, 1, 6, 1], dtype=uint64)
>>> np.round(np.sqrt(s, dtype=np.float64), decimals=1).todense() # doctest: +SKIP
array([[ 1. , 0. , 0. , 0. ],
[ 0. , 1. , 0. , 0. ],
[ 0. , 0. , 1. , 2.2],
[ 0. , 0. , 0. , 1. ]])
Operations that will result in a dense array will raise a :obj:`ValueError`,
such as the following.
>>> np.exp(s)
Traceback (most recent call last):
...
ValueError: Performing this operation would produce a dense result: <ufunc 'exp'>
You can also create :obj:`COO` arrays from coordinates and data.
>>> coords = [[0, 0, 0, 1, 1],
... [0, 1, 2, 0, 3],
... [0, 3, 2, 0, 1]]
>>> data = [1, 2, 3, 4, 5]
>>> s4 = COO(coords, data, shape=(3, 4, 5))
>>> s4
<COO: shape=(3, 4, 5), dtype=int64, nnz=5, sorted=False, duplicates=True>
Following scipy.sparse conventions you can also pass these as a tuple with
rows and columns
>>> rows = [0, 1, 2, 3, 4]
>>> cols = [0, 0, 0, 1, 1]
>>> data = [10, 20, 30, 40, 50]
>>> z = COO((data, (rows, cols)))
>>> z.todense() # doctest: +NORMALIZE_WHITESPACE
array([[10, 0],
[20, 0],
[30, 0],
[ 0, 40],
[ 0, 50]])
You can also pass a dictionary or iterable of index/value pairs. Repeated
indices imply summation:
>>> d = {(0, 0, 0): 1, (1, 2, 3): 2, (1, 1, 0): 3}
>>> COO(d)
<COO: shape=(2, 3, 4), dtype=int64, nnz=3, sorted=False, duplicates=False>
>>> L = [((0, 0), 1),
... ((1, 1), 2),
... ((0, 0), 3)]
>>> COO(L).todense() # doctest: +NORMALIZE_WHITESPACE
array([[4, 0],
[0, 2]])
You can convert :obj:`DOK` arrays to :obj:`COO` arrays.
>>> from sparse import DOK
>>> s5 = DOK((5, 5), dtype=np.int64)
>>> s5[1:3, 1:3] = [[4, 5], [6, 7]]
>>> s5
<DOK: shape=(5, 5), dtype=int64, nnz=4>
>>> s6 = COO(s5)
>>> s6
<COO: shape=(5, 5), dtype=int64, nnz=4, sorted=False, duplicates=False>
>>> s6.todense() # doctest: +NORMALIZE_WHITESPACE
array([[0, 0, 0, 0, 0],
[0, 4, 5, 0, 0],
[0, 6, 7, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]])
"""
__array_priority__ = 12
def __init__(self, coords, data=None, shape=None, has_duplicates=True,
sorted=False, cache=False):
self._cache = None
if cache:
self.enable_caching()
if data is None:
from .dok import DOK
if isinstance(coords, COO):
self.coords = coords.coords
self.data = coords.data
self.has_duplicates = coords.has_duplicates
self.sorted = coords.sorted
self.shape = coords.shape
return
if isinstance(coords, DOK):
shape = coords.shape
coords = coords.data
# {(i, j, k): x, (i, j, k): y, ...}
if isinstance(coords, dict):
coords = list(coords.items())
has_duplicates = False
if isinstance(coords, np.ndarray):
result = COO.from_numpy(coords)
self.coords = result.coords
self.data = result.data
self.has_duplicates = result.has_duplicates
self.sorted = result.sorted
self.shape = result.shape
return
if isinstance(coords, scipy.sparse.spmatrix):
result = COO.from_scipy_sparse(coords)
self.coords = result.coords
self.data = result.data
self.has_duplicates = result.has_duplicates
self.sorted = result.sorted
self.shape = result.shape
return
# []
if not coords:
data = []
coords = []
# [((i, j, k), value), (i, j, k), value), ...]
elif isinstance(coords[0][0], Iterable):
if coords:
assert len(coords[0]) == 2
data = [x[1] for x in coords]
coords = [x[0] for x in coords]
coords = np.asarray(coords).T
# (data, (row, col, slab, ...))
else:
data = coords[0]
coords = np.stack(coords[1], axis=0)
self.data = np.asarray(data)
self.coords = np.asarray(coords)
if self.coords.ndim == 1:
self.coords = self.coords[None, :]
if shape and not self.coords.size:
self.coords = np.zeros((len(shape), 0), dtype=np.uint64)
if shape is None:
if self.coords.nbytes:
shape = tuple((self.coords.max(axis=1) + 1).tolist())
else:
shape = ()
if isinstance(shape, numbers.Integral):
shape = (int(shape),)
self.shape = tuple(shape)
if self.shape:
dtype = np.min_scalar_type(max(self.shape))
else:
dtype = np.int_
self.coords = self.coords.astype(dtype)
assert not self.shape or len(data) == self.coords.shape[1]
self.has_duplicates = has_duplicates
self.sorted = sorted
[docs] def enable_caching(self):
""" Enable caching of reshape, transpose, and tocsr/csc operations
This enables efficient iterative workflows that make heavy use of
csr/csc operations, such as tensordot. This maintains a cache of
recent results of reshape and transpose so that operations like
tensordot (which uses both internally) store efficiently stored
representations for repeated use. This can significantly cut down on
computational costs in common numeric algorithms.
However, this also assumes that neither this object, nor the downstream
objects will have their data mutated.
Examples
--------
>>> s.enable_caching() # doctest: +SKIP
>>> csr1 = s.transpose((2, 0, 1)).reshape((100, 120)).tocsr() # doctest: +SKIP
>>> csr2 = s.transpose((2, 0, 1)).reshape((100, 120)).tocsr() # doctest: +SKIP
>>> csr1 is csr2 # doctest: +SKIP
True
"""
self._cache = defaultdict(lambda: deque(maxlen=3))
return self
[docs] @classmethod
def from_numpy(cls, x):
"""
Convert the given :obj:`numpy.ndarray` to a :obj:`COO` object.
Parameters
----------
x : np.ndarray
The dense array to convert.
Returns
-------
COO
The converted COO array.
Examples
--------
>>> x = np.eye(5)
>>> s = COO.from_numpy(x)
>>> s
<COO: shape=(5, 5), dtype=float64, nnz=5, sorted=True, duplicates=False>
"""
x = np.asanyarray(x)
if x.shape:
coords = np.where(x)
data = x[coords]
coords = np.vstack(coords)
else:
coords = np.empty((0, 1), dtype=np.uint8)
data = np.array(x, ndmin=1)
return cls(coords, data, shape=x.shape, has_duplicates=False,
sorted=True)
[docs] def todense(self):
"""
Convert this :obj:`COO` array to a dense :obj:`numpy.ndarray`. Note that
this may take a large amount of memory if the :obj:`COO` object's :code:`shape`
is large.
Returns
-------
numpy.ndarray
The converted dense array.
See Also
--------
DOK.todense : Equivalent :obj:`DOK` array method.
scipy.sparse.coo_matrix.todense : Equivalent Scipy method.
Examples
--------
>>> x = np.random.randint(100, size=(7, 3))
>>> s = COO.from_numpy(x)
>>> x2 = s.todense()
>>> np.array_equal(x, x2)
True
"""
self.sum_duplicates()
x = np.zeros(shape=self.shape, dtype=self.dtype)
coords = tuple([self.coords[i, :] for i in range(self.ndim)])
data = self.data
if coords != ():
x[coords] = data
else:
if len(data) != 0:
x[coords] = data
return x
[docs] @classmethod
def from_scipy_sparse(cls, x):
"""
Construct a :obj:`COO` array from a :obj:`scipy.sparse.spmatrix`
Parameters
----------
x : scipy.sparse.spmatrix
The sparse matrix to construct the array from.
Returns
-------
COO
The converted :obj:`COO` object.
Examples
--------
>>> x = scipy.sparse.rand(6, 3, density=0.2)
>>> s = COO.from_scipy_sparse(x)
>>> np.array_equal(x.todense(), s.todense())
True
"""
x = scipy.sparse.coo_matrix(x)
coords = np.empty((2, x.nnz), dtype=x.row.dtype)
coords[0, :] = x.row
coords[1, :] = x.col
return COO(coords, x.data, shape=x.shape,
has_duplicates=not x.has_canonical_format,
sorted=x.has_canonical_format)
@property
def dtype(self):
"""
The datatype of this array.
Returns
-------
numpy.dtype
The datatype of this array.
See Also
--------
numpy.ndarray.dtype : Numpy equivalent property.
scipy.sparse.coo_matrix.dtype : Scipy equivalent property.
Examples
--------
>>> x = (200 * np.random.rand(5, 4)).astype(np.int32)
>>> s = COO.from_numpy(x)
>>> s.dtype
dtype('int32')
>>> x.dtype == s.dtype
True
"""
return self.data.dtype
@property
def ndim(self):
"""
The number of dimensions of this array.
Returns
-------
int
The number of dimensions of this array.
See Also
--------
DOK.ndim : Equivalent property for :obj:`DOK` arrays.
numpy.ndarray.ndim : Numpy equivalent property.
Examples
--------
>>> x = np.random.rand(1, 2, 3, 1, 2)
>>> s = COO.from_numpy(x)
>>> s.ndim
5
>>> s.ndim == x.ndim
True
"""
return len(self.shape)
@property
def nnz(self):
"""
The number of nonzero elements in this array. Note that any duplicates in
:code:`coords` are counted multiple times. To avoid this, call :obj:`COO.sum_duplicates`.
Returns
-------
int
The number of nonzero elements in this array.
See Also
--------
DOK.nnz : Equivalent :obj:`DOK` array property.
numpy.count_nonzero : A similar Numpy function.
scipy.sparse.coo_matrix.nnz : The Scipy equivalent property.
Examples
--------
>>> x = np.array([0, 0, 1, 0, 1, 2, 0, 1, 2, 3, 0, 0])
>>> np.count_nonzero(x)
6
>>> s = COO.from_numpy(x)
>>> s.nnz
6
>>> np.count_nonzero(x) == s.nnz
True
"""
return self.coords.shape[1]
@property
def nbytes(self):
"""
The number of bytes taken up by this object. Note that for small arrays,
this may undercount the number of bytes due to the large constant overhead.
Returns
-------
int
The approximate bytes of memory taken by this object.
See Also
--------
numpy.ndarray.nbytes : The equivalent Numpy property.
Examples
--------
>>> data = np.arange(6, dtype=np.uint8)
>>> coords = np.random.randint(1000, size=(3, 6), dtype=np.uint16)
>>> s = COO(coords, data, shape=(1000, 1000, 1000))
>>> s.nbytes
42
"""
return self.data.nbytes + self.coords.nbytes
def __len__(self):
"""
Get "length" of array, which is by definition the size of the first
dimension.
Returns
-------
int
The size of the first dimension.
See Also
--------
numpy.ndarray.__len__ : Numpy equivalent property.
Examples
--------
>>> x = np.zeros((10, 10))
>>> s = COO.from_numpy(x)
>>> len(s)
10
"""
return self.shape[0]
@property
def size(self):
"""
The number of all elements (including zeros) in this array.
Returns
-------
int
The number of elements.
See Also
--------
numpy.ndarray.size : Numpy equivalent property.
Examples
--------
>>> x = np.zeros((10, 10))
>>> s = COO.from_numpy(x)
>>> s.size
100
"""
return np.prod(self.shape)
@property
def density(self):
"""
The ratio of nonzero to all elements in this array.
Returns
-------
float
The ratio of nonzero to all elements.
See Also
--------
COO.size : Number of elements.
COO.nnz : Number of nonzero elements.
Examples
--------
>>> x = np.zeros((8, 8))
>>> x[0, :] = 1
>>> s = COO.from_numpy(x)
>>> s.density
0.125
"""
return self.nnz / self.size
def __sizeof__(self):
return self.nbytes
def __getitem__(self, index):
if not isinstance(index, tuple):
if isinstance(index, str):
data = self.data[index]
idx = np.where(data)
coords = list(self.coords[:, idx[0]])
coords.extend(idx[1:])
return COO(coords, data[idx].flatten(),
shape=self.shape + self.data.dtype[index].shape,
has_duplicates=self.has_duplicates,
sorted=self.sorted)
else:
index = (index,)
last_ellipsis = len(index) > 0 and index[-1] is Ellipsis
index = normalize_index(index, self.shape)
if len(index) != 0 and all(not isinstance(ind, Iterable) and ind == slice(None) for ind in index):
return self
mask = np.ones(self.nnz, dtype=np.bool)
for i, ind in enumerate([i for i in index if i is not None]):
if not isinstance(ind, Iterable) and ind == slice(None):
continue
mask &= _mask(self.coords[i], ind, self.shape[i])
n = mask.sum()
coords = []
shape = []
i = 0
for ind in index:
if isinstance(ind, numbers.Integral):
i += 1
continue
elif isinstance(ind, slice):
step = ind.step if ind.step is not None else 1
if step > 0:
start = ind.start if ind.start is not None else 0
start = max(start, 0)
stop = ind.stop if ind.stop is not None else self.shape[i]
stop = min(stop, self.shape[i])
if start > stop:
start = stop
shape.append((stop - start + step - 1) // step)
else:
start = ind.start or self.shape[i] - 1
stop = ind.stop if ind.stop is not None else -1
start = min(start, self.shape[i] - 1)
stop = max(stop, -1)
if start < stop:
start = stop
shape.append((start - stop - step - 1) // (-step))
dt = np.min_scalar_type(min(-(dim - 1) if dim != 0 else -1 for dim in shape))
coords.append((self.coords[i, mask].astype(dt) - start) // step)
i += 1
elif isinstance(ind, Iterable):
old = self.coords[i][mask]
new = np.empty(shape=old.shape, dtype=old.dtype)
for j, item in enumerate(ind):
new[old == item] = j
coords.append(new)
shape.append(len(ind))
i += 1
elif ind is None:
coords.append(np.zeros(n))
shape.append(1)
for j in range(i, self.ndim):
coords.append(self.coords[j][mask])
shape.append(self.shape[j])
if coords:
coords = np.stack(coords, axis=0)
else:
if last_ellipsis:
coords = np.empty((0, np.sum(mask)), dtype=np.uint8)
else:
if np.sum(mask) != 0:
return self.data[mask][0]
else:
return _zero_of_dtype(self.dtype)[()]
shape = tuple(shape)
data = self.data[mask]
return COO(coords, data, shape=shape,
has_duplicates=self.has_duplicates,
sorted=self.sorted)
def __str__(self):
return "<COO: shape=%s, dtype=%s, nnz=%d, sorted=%s, duplicates=%s>" % (
self.shape, self.dtype, self.nnz, self.sorted,
self.has_duplicates)
__repr__ = __str__
@staticmethod
def _reduce(method, *args, **kwargs):
assert len(args) == 1
self = args[0]
if isinstance(self, scipy.sparse.spmatrix):
self = COO.from_scipy_sparse(self)
return self.reduce(method, **kwargs)
[docs] def reduce(self, method, axis=None, keepdims=False, **kwargs):
"""
Performs a reduction operation on this array.
Parameters
----------
method : numpy.ufunc
The method to use for performing the reduction.
axis : Union[int, Iterable[int]], optional
The axes along which to perform the reduction. Uses all axes by default.
keepdims : bool, optional
Whether or not to keep the dimensions of the original array.
kwargs : dict
Any extra arguments to pass to the reduction operation.
Returns
-------
COO
The result of the reduction operation.
Raises
------
ValueError
If reducing an all-zero axis would produce a nonzero result.
Notes
-----
This function internally calls :obj:`COO.sum_duplicates` to bring the array into
canonical form.
See Also
--------
numpy.ufunc.reduce : A similar Numpy method.
Examples
--------
You can use the :obj:`COO.reduce` method to apply a reduction operation to
any Numpy :code:`ufunc`.
>>> x = np.ones((5, 5), dtype=np.int)
>>> s = COO.from_numpy(x)
>>> s2 = s.reduce(np.add, axis=1)
>>> s2.todense() # doctest: +NORMALIZE_WHITESPACE
array([5, 5, 5, 5, 5])
You can also use the :code:`keepdims` argument to keep the dimensions after the
reduction.
>>> s3 = s.reduce(np.add, axis=1, keepdims=True)
>>> s3.shape
(5, 1)
You can also pass in any keyword argument that :obj:`numpy.ufunc.reduce` supports.
For example, :code:`dtype`. Note that :code:`out` isn't supported.
>>> s4 = s.reduce(np.add, axis=1, dtype=np.float16)
>>> s4.dtype
dtype('float16')
By default, this reduces the array down to one number, reducing along all axes.
>>> s.reduce(np.add)
25
"""
zero_reduce_result = method.reduce([_zero_of_dtype(self.dtype)], **kwargs)
if zero_reduce_result != _zero_of_dtype(np.dtype(zero_reduce_result)):
raise ValueError("Performing this reduction operation would produce "
"a dense result: %s" % str(method))
# Needed for more esoteric reductions like product.
self.sum_duplicates()
if axis is None:
axis = tuple(range(self.ndim))
if not isinstance(axis, tuple):
axis = (axis,)
if set(axis) == set(range(self.ndim)):
result = method.reduce(self.data, **kwargs)
if self.nnz != self.size:
result = method(result, _zero_of_dtype(self.dtype)[()], **kwargs)
else:
axis = tuple(axis)
neg_axis = tuple(ax for ax in range(self.ndim) if ax not in axis)
a = self.transpose(neg_axis + axis)
a = a.reshape((np.prod([self.shape[d] for d in neg_axis]),
np.prod([self.shape[d] for d in axis])))
a.sort_indices()
result, inv_idx, counts = _grouped_reduce(a.data, a.coords[0], method, **kwargs)
missing_counts = counts != a.shape[1]
result[missing_counts] = method(result[missing_counts],
_zero_of_dtype(self.dtype), **kwargs)
coords = a.coords[0:1, inv_idx]
a = COO(coords, result, shape=(a.shape[0],),
has_duplicates=False, sorted=True)
a = a.reshape([self.shape[d] for d in neg_axis])
result = a
if keepdims:
result = _keepdims(self, result, axis)
return result
[docs] def sum(self, axis=None, keepdims=False, dtype=None, out=None):
"""
Performs a sum operation along the given axes. Uses all axes by default.
Parameters
----------
axis : Union[int, Iterable[int]], optional
The axes along which to sum. Uses all axes by default.
keepdims : bool, optional
Whether or not to keep the dimensions of the original array.
dtype: numpy.dtype
The data type of the output array.
Returns
-------
COO
The reduced output sparse array.
See Also
--------
:obj:`numpy.sum` : Equivalent numpy function.
scipy.sparse.coo_matrix.sum : Equivalent Scipy function.
Notes
-----
* This function internally calls :obj:`COO.sum_duplicates` to bring the array into
canonical form.
* The :code:`out` parameter is provided just for compatibility with Numpy and
isn't actually supported.
Examples
--------
You can use :obj:`COO.sum` to sum an array across any dimension.
>>> x = np.ones((5, 5), dtype=np.int)
>>> s = COO.from_numpy(x)
>>> s2 = s.sum(axis=1)
>>> s2.todense() # doctest: +NORMALIZE_WHITESPACE
array([5, 5, 5, 5, 5])
You can also use the :code:`keepdims` argument to keep the dimensions after the
sum.
>>> s3 = s.sum(axis=1, keepdims=True)
>>> s3.shape
(5, 1)
You can pass in an output datatype, if needed.
>>> s4 = s.sum(axis=1, dtype=np.float16)
>>> s4.dtype
dtype('float16')
By default, this reduces the array down to one number, summing along all axes.
>>> s.sum()
25
"""
assert out is None
return self.reduce(np.add, axis=axis, keepdims=keepdims, dtype=dtype)
[docs] def max(self, axis=None, keepdims=False, out=None):
"""
Maximize along the given axes. Uses all axes by default.
Parameters
----------
axis : Union[int, Iterable[int]], optional
The axes along which to maximize. Uses all axes by default.
keepdims : bool, optional
Whether or not to keep the dimensions of the original array.
dtype: numpy.dtype
The data type of the output array.
Returns
-------
COO
The reduced output sparse array.
See Also
--------
:obj:`numpy.max` : Equivalent numpy function.
scipy.sparse.coo_matrix.max : Equivalent Scipy function.
Notes
-----
* This function internally calls :obj:`COO.sum_duplicates` to bring the array into
canonical form.
* The :code:`out` parameter is provided just for compatibility with Numpy and
isn't actually supported.
Examples
--------
You can use :obj:`COO.max` to maximize an array across any dimension.
>>> x = np.add.outer(np.arange(5), np.arange(5))
>>> x # doctest: +NORMALIZE_WHITESPACE
array([[0, 1, 2, 3, 4],
[1, 2, 3, 4, 5],
[2, 3, 4, 5, 6],
[3, 4, 5, 6, 7],
[4, 5, 6, 7, 8]])
>>> s = COO.from_numpy(x)
>>> s2 = s.max(axis=1)
>>> s2.todense() # doctest: +NORMALIZE_WHITESPACE
array([4, 5, 6, 7, 8])
You can also use the :code:`keepdims` argument to keep the dimensions after the
maximization.
>>> s3 = s.max(axis=1, keepdims=True)
>>> s3.shape
(5, 1)
By default, this reduces the array down to one number, maximizing along all axes.
>>> s.max()
8
"""
assert out is None
return self.reduce(np.maximum, axis=axis, keepdims=keepdims)
[docs] def min(self, axis=None, keepdims=False, out=None):
"""
Minimize along the given axes. Uses all axes by default.
Parameters
----------
axis : Union[int, Iterable[int]], optional
The axes along which to minimize. Uses all axes by default.
keepdims : bool, optional
Whether or not to keep the dimensions of the original array.
dtype: numpy.dtype
The data type of the output array.
Returns
-------
COO
The reduced output sparse array.
See Also
--------
:obj:`numpy.min` : Equivalent numpy function.
scipy.sparse.coo_matrix.min : Equivalent Scipy function.
Notes
-----
* This function internally calls :obj:`COO.sum_duplicates` to bring the array into
canonical form.
* The :code:`out` parameter is provided just for compatibility with Numpy and
isn't actually supported.
Examples
--------
You can use :obj:`COO.min` to minimize an array across any dimension.
>>> x = np.add.outer(np.arange(5), np.arange(5))
>>> x # doctest: +NORMALIZE_WHITESPACE
array([[0, 1, 2, 3, 4],
[1, 2, 3, 4, 5],
[2, 3, 4, 5, 6],
[3, 4, 5, 6, 7],
[4, 5, 6, 7, 8]])
>>> s = COO.from_numpy(x)
>>> s2 = s.min(axis=1)
>>> s2.todense() # doctest: +NORMALIZE_WHITESPACE
array([0, 1, 2, 3, 4])
You can also use the :code:`keepdims` argument to keep the dimensions after the
minimization.
>>> s3 = s.min(axis=1, keepdims=True)
>>> s3.shape
(5, 1)
By default, this reduces the array down to one number, minimizing along all axes.
>>> s.min()
0
"""
assert out is None
return self.reduce(np.minimum, axis=axis, keepdims=keepdims)
[docs] def prod(self, axis=None, keepdims=False, dtype=None, out=None):
"""
Performs a product operation along the given axes. Uses all axes by default.
Parameters
----------
axis : Union[int, Iterable[int]], optional
The axes along which to multiply. Uses all axes by default.
keepdims : bool, optional
Whether or not to keep the dimensions of the original array.
dtype: numpy.dtype
The data type of the output array.
Returns
-------
COO
The reduced output sparse array.
See Also
--------
:obj:`numpy.prod` : Equivalent numpy function.
Notes
-----
* This function internally calls :obj:`COO.sum_duplicates` to bring the array into
canonical form.
* The :code:`out` parameter is provided just for compatibility with Numpy and
isn't actually supported.
Examples
--------
You can use :obj:`COO.prod` to multiply an array across any dimension.
>>> x = np.add.outer(np.arange(5), np.arange(5))
>>> x # doctest: +NORMALIZE_WHITESPACE
array([[0, 1, 2, 3, 4],
[1, 2, 3, 4, 5],
[2, 3, 4, 5, 6],
[3, 4, 5, 6, 7],
[4, 5, 6, 7, 8]])
>>> s = COO.from_numpy(x)
>>> s2 = s.prod(axis=1)
>>> s2.todense() # doctest: +NORMALIZE_WHITESPACE
array([ 0, 120, 720, 2520, 6720])
You can also use the :code:`keepdims` argument to keep the dimensions after the
reduction.
>>> s3 = s.prod(axis=1, keepdims=True)
>>> s3.shape
(5, 1)
You can pass in an output datatype, if needed.
>>> s4 = s.prod(axis=1, dtype=np.float16)
>>> s4.dtype
dtype('float16')
By default, this reduces the array down to one number, multiplying along all axes.
>>> s.prod()
0
"""
assert out is None
return self.reduce(np.multiply, axis=axis, keepdims=keepdims, dtype=dtype)
[docs] def transpose(self, axes=None):
"""
Returns a new array which has the order of the axes switched.
Parameters
----------
axes : Iterable[int], optional
The new order of the axes compared to the previous one. Reverses the axes
by default.
Returns
-------
COO
The new array with the axes in the desired order.
See Also
--------
:obj:`COO.T` : A quick property to reverse the order of the axes.
numpy.ndarray.transpose : Numpy equivalent function.
Examples
--------
We can change the order of the dimensions of any :obj:`COO` array with this
function.
>>> x = np.add.outer(np.arange(5), np.arange(5)[::-1])
>>> x # doctest: +NORMALIZE_WHITESPACE
array([[4, 3, 2, 1, 0],
[5, 4, 3, 2, 1],
[6, 5, 4, 3, 2],
[7, 6, 5, 4, 3],
[8, 7, 6, 5, 4]])
>>> s = COO.from_numpy(x)
>>> s.transpose((1, 0)).todense() # doctest: +NORMALIZE_WHITESPACE
array([[4, 5, 6, 7, 8],
[3, 4, 5, 6, 7],
[2, 3, 4, 5, 6],
[1, 2, 3, 4, 5],
[0, 1, 2, 3, 4]])
Note that by default, this reverses the order of the axes rather than switching
the last and second-to-last axes as required by some linear algebra operations.
>>> x = np.random.rand(2, 3, 4)
>>> s = COO.from_numpy(x)
>>> s.transpose().shape
(4, 3, 2)
"""
if axes is None:
axes = list(reversed(range(self.ndim)))
# Normalize all axe indices to posivite values
axes = np.array(axes)
axes[axes < 0] += self.ndim
if np.any(axes >= self.ndim) or np.any(axes < 0):
raise ValueError("invalid axis for this array")
if len(np.unique(axes)) < len(axes):
raise ValueError("repeated axis in transpose")
if not len(axes) == self.ndim:
raise ValueError("axes don't match array")
# Normalize all axe indices to posivite values
try:
axes = np.arange(self.ndim)[list(axes)]
except IndexError:
raise ValueError("invalid axis for this array")
if len(np.unique(axes)) < len(axes):
raise ValueError("repeated axis in transpose")
if not len(axes) == self.ndim:
raise ValueError("axes don't match array")
axes = tuple(axes)
if axes == tuple(range(self.ndim)):
return self
if self._cache is not None:
for ax, value in self._cache['transpose']:
if ax == axes:
return value
shape = tuple(self.shape[ax] for ax in axes)
result = COO(self.coords[axes, :], self.data, shape,
has_duplicates=self.has_duplicates,
cache=self._cache is not None)
if self._cache is not None:
self._cache['transpose'].append((axes, result))
return result
@property
def T(self):
"""
Returns a new array which has the order of the axes reversed.
Returns
-------
COO
The new array with the axes in the desired order.
See Also
--------
:obj:`COO.transpose` : A method where you can specify the order of the axes.
numpy.ndarray.T : Numpy equivalent property.
Examples
--------
We can change the order of the dimensions of any :obj:`COO` array with this
function.
>>> x = np.add.outer(np.arange(5), np.arange(5)[::-1])
>>> x # doctest: +NORMALIZE_WHITESPACE
array([[4, 3, 2, 1, 0],
[5, 4, 3, 2, 1],
[6, 5, 4, 3, 2],
[7, 6, 5, 4, 3],
[8, 7, 6, 5, 4]])
>>> s = COO.from_numpy(x)
>>> s.T.todense() # doctest: +NORMALIZE_WHITESPACE
array([[4, 5, 6, 7, 8],
[3, 4, 5, 6, 7],
[2, 3, 4, 5, 6],
[1, 2, 3, 4, 5],
[0, 1, 2, 3, 4]])
Note that by default, this reverses the order of the axes rather than switching
the last and second-to-last axes as required by some linear algebra operations.
>>> x = np.random.rand(2, 3, 4)
>>> s = COO.from_numpy(x)
>>> s.T.shape
(4, 3, 2)
"""
return self.transpose(tuple(range(self.ndim))[::-1])
[docs] def dot(self, other):
"""
Performs the equivalent of :code:`x.dot(y)` for :obj:`COO`.
Parameters
----------
other : Union[COO, numpy.ndarray, scipy.sparse.spmatrix]
The second operand of the dot product operation.
Returns
-------
{COO, numpy.ndarray}
The result of the dot product. If the result turns out to be dense,
then a dense array is returned, otherwise, a sparse array.
See Also
--------
dot : Equivalent function for two arguments.
:obj:`numpy.dot` : Numpy equivalent function.
scipy.sparse.coo_matrix.dot : Scipy equivalent function.
Examples
--------
>>> x = np.arange(4).reshape((2, 2))
>>> s = COO.from_numpy(x)
>>> s.dot(s) # doctest: +SKIP
array([[ 2, 3],
[ 6, 11]], dtype=int64)
"""
return dot(self, other)
def __matmul__(self, other):
try:
return dot(self, other)
except NotImplementedError:
return NotImplemented
def __rmatmul__(self, other):
try:
return dot(other, self)
except NotImplementedError:
return NotImplemented
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
if method == '__call__':
return COO._elemwise(ufunc, *inputs, **kwargs)
elif method == 'reduce':
return COO._reduce(ufunc, *inputs, **kwargs)
else:
return NotImplemented
def __array__(self, dtype=None, **kwargs):
x = self.todense()
if dtype and x.dtype != dtype:
x = x.astype(dtype)
return x
[docs] def linear_loc(self, signed=False):
"""
The nonzero coordinates of a flattened version of this array. Note that
the coordinates may be out of order.
Parameters
----------
signed : bool, optional
Whether to use a signed datatype for the output array. :code:`False`
by default.
Returns
-------
numpy.ndarray
The flattened coordinates.
See Also
--------
:obj:`numpy.flatnonzero` : Equivalent Numpy function.
Examples
--------
>>> x = np.eye(5)
>>> s = COO.from_numpy(x)
>>> s.linear_loc() # doctest: +NORMALIZE_WHITESPACE
array([ 0, 6, 12, 18, 24], dtype=uint8)
>>> np.array_equal(np.flatnonzero(x), s.linear_loc())
True
"""
return _linear_loc(self.coords, self.shape, signed)
[docs] def reshape(self, shape):
"""
Returns a new :obj:`COO` array that is a reshaped version of this array.
Parameters
----------
shape : tuple[int]
The desired shape of the output array.
Returns
-------
COO
The reshaped output array.
See Also
--------
numpy.ndarray.reshape : The equivalent Numpy function.
Examples
--------
>>> s = COO.from_numpy(np.arange(25))
>>> s2 = s.reshape((5, 5))
>>> s2.todense() # doctest: +NORMALIZE_WHITESPACE
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24]])
"""
if self.shape == shape:
return self
if any(d == -1 for d in shape):
extra = int(self.size /
np.prod([d for d in shape if d != -1]))
shape = tuple([d if d != -1 else extra for d in shape])
if self.shape == shape:
return self
if self._cache is not None:
for sh, value in self._cache['reshape']:
if sh == shape:
return value
# TODO: this self.size enforces a 2**64 limit to array size
linear_loc = self.linear_loc()
max_shape = max(shape) if len(shape) != 0 else 1
coords = np.empty((len(shape), self.nnz), dtype=np.min_scalar_type(max_shape - 1))
strides = 1
for i, d in enumerate(shape[::-1]):
coords[-(i + 1), :] = (linear_loc // strides) % d
strides *= d
result = COO(coords, self.data, shape,
has_duplicates=self.has_duplicates,
sorted=self.sorted, cache=self._cache is not None)
if self._cache is not None:
self._cache['reshape'].append((shape, result))
return result
[docs] def to_scipy_sparse(self):
"""
Converts this :obj:`COO` object into a :obj:`scipy.sparse.coo_matrix`.
Returns
-------
:obj:`scipy.sparse.coo_matrix`
The converted Scipy sparse matrix.
Raises
------
ValueError
If the array is not two-dimensional.
See Also
--------
COO.tocsr : Convert to a :obj:`scipy.sparse.csr_matrix`.
COO.tocsc : Convert to a :obj:`scipy.sparse.csc_matrix`.
"""
if self.ndim != 2:
raise ValueError("Can only convert a 2-dimensional array to a Scipy sparse matrix.")
result = scipy.sparse.coo_matrix((self.data,
(self.coords[0],
self.coords[1])),
shape=self.shape)
result.has_canonical_format = (not self.has_duplicates and self.sorted)
return result
def _tocsr(self):
if self.ndim != 2:
raise ValueError('This array must be two-dimensional for this conversion '
'to work.')
# Pass 1: sum duplicates
self.sum_duplicates()
# Pass 2: sort indices
self.sort_indices()
row, col = self.coords
# Pass 3: count nonzeros in each row
indptr = np.zeros(self.shape[0] + 1, dtype=np.int64)
np.cumsum(np.bincount(row, minlength=self.shape[0]), out=indptr[1:])
return scipy.sparse.csr_matrix((self.data, col, indptr), shape=self.shape)
[docs] def tocsr(self):
"""
Converts this array to a :obj:`scipy.sparse.csr_matrix`.
Returns
-------
scipy.sparse.csr_matrix
The result of the conversion.
Raises
------
ValueError
If the array is not two-dimensional.
See Also
--------
COO.tocsc : Convert to a :obj:`scipy.sparse.csc_matrix`.
COO.to_scipy_sparse : Convert to a :obj:`scipy.sparse.coo_matrix`.
scipy.sparse.coo_matrix.tocsr : Equivalent Scipy function.
"""
if self._cache is not None:
try:
return self._csr
except AttributeError:
pass
try:
self._csr = self._csc.tocsr()
return self._csr
except AttributeError:
pass
self._csr = csr = self._tocsr()
else:
csr = self._tocsr()
return csr
[docs] def tocsc(self):
"""
Converts this array to a :obj:`scipy.sparse.csc_matrix`.
Returns
-------
scipy.sparse.csc_matrix
The result of the conversion.
Raises
------
ValueError
If the array is not two-dimensional.
See Also
--------
COO.tocsr : Convert to a :obj:`scipy.sparse.csr_matrix`.
COO.to_scipy_sparse : Convert to a :obj:`scipy.sparse.coo_matrix`.
scipy.sparse.coo_matrix.tocsc : Equivalent Scipy function.
"""
if self._cache is not None:
try:
return self._csc
except AttributeError:
pass
try:
self._csc = self._csr.tocsc()
return self._csc
except AttributeError:
pass
self._csc = csc = self.tocsr().tocsc()
else:
csc = self.tocsr().tocsc()
return csc
[docs] def sort_indices(self):
"""
Sorts the :obj:`COO.coords` attribute. Also sorts the data in
:obj:`COO.data` to match.
Examples
--------
>>> coords = np.array([[1, 2, 0]], dtype=np.uint8)
>>> data = np.array([4, 1, 3], dtype=np.uint8)
>>> s = COO(coords, data)
>>> s.sort_indices()
>>> s.coords # doctest: +NORMALIZE_WHITESPACE
array([[0, 1, 2]], dtype=uint8)
>>> s.data # doctest: +NORMALIZE_WHITESPACE
array([3, 4, 1], dtype=uint8)
"""
if self.sorted:
return
linear = self.linear_loc(signed=True)
if (np.diff(linear) > 0).all(): # already sorted
self.sorted = True
return
order = np.argsort(linear)
self.coords = self.coords[:, order]
self.data = self.data[order]
self.sorted = True
[docs] def sum_duplicates(self):
"""
Sums data corresponding to duplicates in :obj:`COO.coords`.
See Also
--------
scipy.sparse.coo_matrix.sum_duplicates : Equivalent Scipy function.
Examples
--------
>>> coords = np.array([[0, 1, 1, 2]], dtype=np.uint8)
>>> data = np.array([6, 5, 2, 2], dtype=np.uint8)
>>> s = COO(coords, data)
>>> s.sum_duplicates()
>>> s.coords # doctest: +NORMALIZE_WHITESPACE
array([[0, 1, 2]], dtype=uint8)
>>> s.data # doctest: +NORMALIZE_WHITESPACE
array([6, 7, 2], dtype=uint8)
"""
# Inspired by scipy/sparse/coo.py::sum_duplicates
# See https://github.com/scipy/scipy/blob/master/LICENSE.txt
if not self.has_duplicates and self.sorted:
return
if not self.coords.size:
return
self.sort_indices()
linear = self.linear_loc()
unique_mask = np.diff(linear) != 0
if unique_mask.sum() == len(unique_mask): # already unique
self.has_duplicates = False
return
unique_mask = np.append(True, unique_mask)
coords = self.coords[:, unique_mask]
(unique_inds,) = np.nonzero(unique_mask)
data = np.add.reduceat(self.data, unique_inds, dtype=self.data.dtype)
self.data = data
self.coords = coords
self.has_duplicates = False
def __add__(self, other):
return self.elemwise(operator.add, other)
def __radd__(self, other):
return self.elemwise(_reverse_self_other(operator.add), other)
def __neg__(self):
return self.elemwise(operator.neg)
def __sub__(self, other):
return self.elemwise(operator.sub, other)
def __rsub__(self, other):
return self.elemwise(_reverse_self_other(operator.sub), other)
def __mul__(self, other):
return self.elemwise(operator.mul, other)
def __rmul__(self, other):
return self.elemwise(_reverse_self_other(operator.mul), other)
def __truediv__(self, other):
return self.elemwise(operator.truediv, other)
def __rtruediv__(self, other):
return self.elemwise(_reverse_self_other(operator.truediv), other)
def __floordiv__(self, other):
return self.elemwise(operator.floordiv, other)
def __rfloordiv__(self, other):
return self.elemwise(_reverse_self_other(operator.floordiv), other)
__div__ = __truediv__
__rdiv__ = __rtruediv__
def __pow__(self, other):
return self.elemwise(operator.pow, other)
def __rpow__(self, other):
return self.elemwise(_reverse_self_other(operator.pow), other)
def __mod__(self, other):
return self.elemwise(operator.mod, other)
def __rmod__(self, other):
return self.elemwise(_reverse_self_other(operator.mod), other)
def __and__(self, other):
return self.elemwise(operator.and_, other)
def __rand__(self, other):
return self.elemwise(_reverse_self_other(operator.and_), other)
def __xor__(self, other):
return self.elemwise(operator.xor, other)
def __rxor__(self, other):
return self.elemwise(_reverse_self_other(operator.xor), other)
def __or__(self, other):
return self.elemwise(operator.or_, other)
def __ror__(self, other):
return self.elemwise(_reverse_self_other(operator.or_), other)
def __invert__(self):
return self.elemwise(operator.invert)
def __gt__(self, other):
return self.elemwise(operator.gt, other)
def __ge__(self, other):
return self.elemwise(operator.ge, other)
def __lt__(self, other):
return self.elemwise(operator.lt, other)
def __le__(self, other):
return self.elemwise(operator.le, other)
def __eq__(self, other):
return self.elemwise(operator.eq, other)
def __ne__(self, other):
return self.elemwise(operator.ne, other)
def __lshift__(self, other):
return self.elemwise(operator.lshift, other)
def __rlshift__(self, other):
return self.elemwise(_reverse_self_other(operator.lshift), other)
def __rshift__(self, other):
return self.elemwise(operator.rshift, other)
def __rrshift__(self, other):
return self.elemwise(_reverse_self_other(operator.rshift), other)
@staticmethod
def _elemwise(func, *args, **kwargs):
if len(args) == 0:
return func()
self = args[0]
if isinstance(self, scipy.sparse.spmatrix):
self = COO.from_numpy(self)
elif np.isscalar(self) or (isinstance(self, np.ndarray)
and self.ndim == 0):
func = partial(func, self)
other = args[1]
if isinstance(other, scipy.sparse.spmatrix):
other = COO.from_scipy_sparse(other)
return _elemwise_unary(func, other, *args[2:], **kwargs)
if len(args) == 1:
return _elemwise_unary(func, self, *args[1:], **kwargs)
else:
other = args[1]
if isinstance(other, scipy.sparse.spmatrix):
other = COO.from_scipy_sparse(other)
if isinstance(other, COO) or isinstance(other, np.ndarray):
return _elemwise_binary(func, self, other, *args[2:], **kwargs)
else:
return _elemwise_unary(func, self, *args[1:], **kwargs)
[docs] def elemwise(self, func, *args, **kwargs):
"""
Apply a function to one or two arguments.
Parameters
----------
func : Callable
The function to apply to one or two arguments.
args : tuple, optional
The extra arguments to pass to the function. If :code:`args[0]` is a COO object,
a scipy.sparse.spmatrix or a scalar; the function will be treated as a binary
function. Otherwise, it will be treated as a unary function.
kwargs : dict, optional
The kwargs to pass to the function.
Returns
-------
COO
The result of applying the function.
Raises
------
ValueError
If the operation would result in a dense matrix.
See Also
--------
:obj:`numpy.ufunc` : A similar Numpy construct. Note that any :code:`ufunc` can be used
as the :code:`func` input to this function.
"""
return COO._elemwise(func, self, *args, **kwargs)
[docs] def broadcast_to(self, shape):
"""
Performs the equivalent of :obj:`numpy.broadcast_to` for :obj:`COO`. Note that
this function returns a new array instead of a view.
Parameters
----------
shape : tuple[int]
The shape to broadcast the data to.
Returns
-------
COO
The broadcasted sparse array.
Raises
------
ValueError
If the operand cannot be broadcast to the given shape.
See also
--------
:obj:`numpy.broadcast_to` : NumPy equivalent function
"""
result_shape = _get_broadcast_shape(self.shape, shape, is_result=True)
params = _get_broadcast_parameters(self.shape, result_shape)
coords, data = _get_expanded_coords_data(self.coords, self.data, params, result_shape)
return COO(coords, data, shape=result_shape, has_duplicates=self.has_duplicates,
sorted=self.sorted)
def __abs__(self):
"""
Calculate the absolute value element-wise.
See also
--------
:obj:`numpy.absolute` : NumPy equivalent ufunc.
"""
return self.elemwise(abs)
abs = __abs__
[docs] def exp(self, out=None):
"""
Calculate the exponential of all elements in the array.
See also
--------
:obj:`numpy.exp` : NumPy equivalent ufunc.
:obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
arguments.
Notes
-----
The :code:`out` parameter is provided just for compatibility with Numpy and isn't
actually supported.
"""
assert out is None
return self.elemwise(np.exp)
[docs] def expm1(self, out=None):
"""
Calculate :code:`exp(x) - 1` for all elements in the array.
See also
--------
scipy.sparse.coo_matrix.expm1 : SciPy sparse equivalent function
:obj:`numpy.expm1` : NumPy equivalent ufunc.
:obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
arguments.
Notes
-----
The :code:`out` parameter is provided just for compatibility with Numpy and isn't
actually supported.
"""
assert out is None
return self.elemwise(np.expm1)
[docs] def log1p(self, out=None):
"""
Return the natural logarithm of one plus the input array, element-wise.
Calculates :code:`log(1 + x)`.
See also
--------
scipy.sparse.coo_matrix.log1p : SciPy sparse equivalent function
:obj:`numpy.log1p` : NumPy equivalent ufunc.
:obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
arguments.
Notes
-----
The :code:`out` parameter is provided just for compatibility with Numpy and isn't
actually supported.
"""
assert out is None
return self.elemwise(np.log1p)
[docs] def sin(self, out=None):
"""
Trigonometric sine, element-wise.
See also
--------
scipy.sparse.coo_matrix.sin : SciPy sparse equivalent function
:obj:`numpy.sin` : NumPy equivalent ufunc.
:obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
arguments.
Notes
-----
The :code:`out` parameter is provided just for compatibility with Numpy and isn't
actually supported.
"""
assert out is None
return self.elemwise(np.sin)
[docs] def sinh(self, out=None):
"""
Hyperbolic sine, element-wise.
See also
--------
scipy.sparse.coo_matrix.sinh : SciPy sparse equivalent function
:obj:`numpy.sinh` : NumPy equivalent ufunc.
:obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
arguments.
Notes
-----
The :code:`out` parameter is provided just for compatibility with Numpy and isn't
actually supported.
"""
assert out is None
return self.elemwise(np.sinh)
[docs] def tan(self, out=None):
"""
Compute tangent element-wise.
See also
--------
scipy.sparse.coo_matrix.tan : SciPy sparse equivalent function
:obj:`numpy.tan` : NumPy equivalent ufunc.
:obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
arguments.
"""
assert out is None
return self.elemwise(np.tan)
[docs] def tanh(self, out=None):
"""
Compute hyperbolic tangent element-wise.
See also
--------
scipy.sparse.coo_matrix.tanh : SciPy sparse equivalent function
:obj:`numpy.tanh` : NumPy equivalent ufunc.
:obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
arguments.
Notes
-----
The :code:`out` parameter is provided just for compatibility with Numpy and isn't
actually supported.
"""
assert out is None
return self.elemwise(np.tanh)
[docs] def sqrt(self, out=None):
"""
Return the positive square-root of an array, element-wise.
See also
--------
scipy.sparse.coo_matrix.sqrt : SciPy sparse equivalent function
:obj:`numpy.sqrt` : NumPy equivalent ufunc.
:obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
arguments.
Notes
-----
The :code:`out` parameter is provided just for compatibility with Numpy and isn't
actually supported.
"""
assert out is None
return self.elemwise(np.sqrt)
[docs] def ceil(self, out=None):
"""
Return the ceiling of the input, element-wise.
See also
--------
scipy.sparse.coo_matrix.ceil : SciPy sparse equivalent function
:obj:`numpy.ceil` : NumPy equivalent ufunc.
:obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
arguments.
Notes
-----
The :code:`out` parameter is provided just for compatibility with Numpy and isn't
actually supported.
"""
assert out is None
return self.elemwise(np.ceil)
[docs] def floor(self, out=None):
"""
Return the floor of the input, element-wise.
See also
--------
scipy.sparse.coo_matrix.floor : SciPy sparse equivalent function
:obj:`numpy.floor` : NumPy equivalent ufunc.
:obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
arguments.
Notes
-----
The :code:`out` parameter is provided just for compatibility with Numpy and isn't
actually supported.
"""
assert out is None
return self.elemwise(np.floor)
[docs] def round(self, decimals=0, out=None):
"""
Evenly round to the given number of decimals.
See also
--------
:obj:`numpy.round` : NumPy equivalent ufunc.
:obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
arguments.
Notes
-----
The :code:`out` parameter is provided just for compatibility with Numpy and isn't
actually supported.
"""
assert out is None
return self.elemwise(np.round, decimals)
[docs] def rint(self, out=None):
"""
Round elements of the array to the nearest integer.
See also
--------
scipy.sparse.coo_matrix.rint : SciPy sparse equivalent function
:obj:`numpy.rint` : NumPy equivalent ufunc.
:obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
arguments.
Notes
-----
The :code:`out` parameter is provided just for compatibility with Numpy and isn't
actually supported.
"""
assert out is None
return self.elemwise(np.rint)
[docs] def conj(self, out=None):
"""
Return the complex conjugate, element-wise.
See also
--------
conjugate : Equivalent function
scipy.sparse.coo_matrix.conj : SciPy sparse equivalent function
:obj:`numpy.conj` : NumPy equivalent ufunc.
:obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
arguments.
Notes
-----
The :code:`out` parameter is provided just for compatibility with Numpy and isn't
actually supported.
"""
assert out is None
return self.elemwise(np.conj)
[docs] def conjugate(self, out=None):
"""
Return the complex conjugate, element-wise.
See also
--------
conj : Equivalent function
scipy.sparse.coo_matrix.conjugate : SciPy sparse equivalent function
:obj:`numpy.conj` : NumPy equivalent ufunc.
:obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
arguments.
Notes
-----
The :code:`out` parameter is provided just for compatibility with Numpy and isn't
actually supported.
"""
assert out is None
return self.elemwise(np.conjugate)
[docs] def astype(self, dtype, out=None):
"""
Copy of the array, cast to a specified type.
See also
--------
scipy.sparse.coo_matrix.astype : SciPy sparse equivalent function
numpy.ndarray.astype : NumPy equivalent ufunc.
:obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
arguments.
Notes
-----
The :code:`out` parameter is provided just for compatibility with Numpy and isn't
actually supported.
"""
assert out is None
return self.elemwise(np.ndarray.astype, dtype)
[docs] def maybe_densify(self, max_size=1000, min_density=0.25):
"""
Converts this :obj:`COO` array to a :obj:`numpy.ndarray` if not too
costly.
Parameters
----------
max_size : int
Maximum number of elements in output
min_density : float
Minimum density of output
Returns
-------
numpy.ndarray
The dense array.
Raises
-------
ValueError
If the returned array would be too large.
Examples
--------
Convert a small sparse array to a dense array.
>>> s = COO.from_numpy(np.random.rand(2, 3, 4))
>>> x = s.maybe_densify()
>>> np.allclose(x, s.todense())
True
You can also specify the minimum allowed density or the maximum number
of output elements. If both conditions are unmet, this method will throw
an error.
>>> x = np.zeros((5, 5), dtype=np.uint8)
>>> x[2, 2] = 1
>>> s = COO.from_numpy(x)
>>> s.maybe_densify(max_size=5, min_density=0.25)
Traceback (most recent call last):
...
ValueError: Operation would require converting large sparse array to dense
"""
if self.size <= max_size or self.density >= min_density:
return self.todense()
else:
raise ValueError("Operation would require converting "
"large sparse array to dense")
[docs]def tensordot(a, b, axes=2):
"""
Perform the equivalent of :obj:`numpy.tensordot`.
Parameters
----------
a, b : Union[COO, np.ndarray, scipy.sparse.spmatrix]
The arrays to perform the :code:`tensordot` operation on.
axes : tuple[Union[int, tuple[int], Union[int, tuple[int]], optional
The axes to match when performing the sum.
Returns
-------
Union[COO, numpy.ndarray]
The result of the operation.
See Also
--------
numpy.tensordot : NumPy equivalent function
"""
# Much of this is stolen from numpy/core/numeric.py::tensordot
# Please see license at https://github.com/numpy/numpy/blob/master/LICENSE.txt
try:
iter(axes)
except TypeError:
axes_a = list(range(-axes, 0))
axes_b = list(range(0, axes))
else:
axes_a, axes_b = axes
try:
na = len(axes_a)
axes_a = list(axes_a)
except TypeError:
axes_a = [axes_a]
na = 1
try:
nb = len(axes_b)
axes_b = list(axes_b)
except TypeError:
axes_b = [axes_b]
nb = 1
# a, b = asarray(a), asarray(b) # <--- modified
as_ = a.shape
nda = a.ndim
bs = b.shape
ndb = b.ndim
equal = True
if na != nb:
equal = False
else:
for k in range(na):
if as_[axes_a[k]] != bs[axes_b[k]]:
equal = False
break
if axes_a[k] < 0:
axes_a[k] += nda
if axes_b[k] < 0:
axes_b[k] += ndb
if not equal:
raise ValueError("shape-mismatch for sum")
# Move the axes to sum over to the end of "a"
# and to the front of "b"
notin = [k for k in range(nda) if k not in axes_a]
newaxes_a = notin + axes_a
N2 = 1
for axis in axes_a:
N2 *= as_[axis]
newshape_a = (-1, N2)
olda = [as_[axis] for axis in notin]
notin = [k for k in range(ndb) if k not in axes_b]
newaxes_b = axes_b + notin
N2 = 1
for axis in axes_b:
N2 *= bs[axis]
newshape_b = (N2, -1)
oldb = [bs[axis] for axis in notin]
at = a.transpose(newaxes_a).reshape(newshape_a)
bt = b.transpose(newaxes_b).reshape(newshape_b)
res = _dot(at, bt)
if isinstance(res, scipy.sparse.spmatrix):
if res.nnz > reduce(operator.mul, res.shape) / 2:
res = res.todense()
else:
res = COO.from_scipy_sparse(res) # <--- modified
res.has_duplicates = False
if isinstance(res, np.matrix):
res = np.asarray(res)
return res.reshape(olda + oldb)
[docs]def dot(a, b):
"""
Perform the equivalent of :obj:`numpy.dot` on two arrays.
Parameters
----------
a, b : Union[COO, np.ndarray, scipy.sparse.spmatrix]
The arrays to perform the :code:`dot` operation on.
Returns
-------
Union[COO, numpy.ndarray]
The result of the operation.
See Also
--------
numpy.dot : NumPy equivalent function.
COO.dot : Equivalent function for COO objects.
"""
if not hasattr(a, 'ndim') or not hasattr(b, 'ndim'):
raise NotImplementedError(
"Cannot perform dot product on types %s, %s" %
(type(a), type(b)))
return tensordot(a, b, axes=((a.ndim - 1,), (b.ndim - 2,)))
def _dot(a, b):
if isinstance(a, COO):
a.sum_duplicates()
if isinstance(b, COO):
b.sum_duplicates()
if isinstance(b, COO) and not isinstance(a, COO):
return _dot(b.T, a.T).T
aa = a.tocsr()
if isinstance(b, (COO, scipy.sparse.spmatrix)):
b = b.tocsc()
return aa.dot(b)
def _keepdims(original, new, axis):
shape = list(original.shape)
for ax in axis:
shape[ax] = 1
return new.reshape(shape)
def _mask(coords, idx, shape):
if isinstance(idx, numbers.Integral):
return coords == idx
elif isinstance(idx, slice):
step = idx.step if idx.step is not None else 1
if step > 0:
start = idx.start if idx.start is not None else 0
stop = idx.stop if idx.stop is not None else shape
return (coords >= start) & (coords < stop) & \
(coords % step == start % step)
else:
start = idx.start if idx.start is not None else (shape - 1)
stop = idx.stop if idx.stop is not None else -1
return (coords <= start) & (coords > stop) & \
(coords % step == start % step)
elif isinstance(idx, Iterable):
mask = np.zeros(len(coords), dtype=np.bool)
for item in idx:
mask |= _mask(coords, item, shape)
return mask
[docs]def concatenate(arrays, axis=0):
"""
Concatenate the input arrays along the given dimension.
Parameters
----------
arrays : Iterable[Union[COO, numpy.ndarray, scipy.sparse.spmatrix]]
The input arrays to concatenate.
axis : int, optional
The axis along which to concatenate the input arrays. The default is zero.
Returns
-------
COO
The output concatenated array.
See Also
--------
numpy.concatenate : NumPy equivalent function
"""
arrays = [x if isinstance(x, COO) else COO(x) for x in arrays]
if axis < 0:
axis = axis + arrays[0].ndim
assert all(x.shape[ax] == arrays[0].shape[ax]
for x in arrays
for ax in set(range(arrays[0].ndim)) - {axis})
nnz = 0
dim = sum(x.shape[axis] for x in arrays)
shape = list(arrays[0].shape)
shape[axis] = dim
coords_dtype = np.min_scalar_type(max(shape) - 1) if len(shape) != 0 else np.uint8
data = np.concatenate([x.data for x in arrays])
coords = np.concatenate([x.coords for x in arrays], axis=1).astype(coords_dtype)
dim = 0
for x in arrays:
if dim:
coords[axis, nnz:x.nnz + nnz] += dim
dim += x.shape[axis]
nnz += x.nnz
has_duplicates = any(x.has_duplicates for x in arrays)
return COO(coords, data, shape=shape, has_duplicates=has_duplicates,
sorted=(axis == 0) and all(a.sorted for a in arrays))
[docs]def stack(arrays, axis=0):
"""
Stack the input arrays along the given dimension.
Parameters
----------
arrays : Iterable[Union[COO, numpy.ndarray, scipy.sparse.spmatrix]]
The input arrays to stack.
axis : int, optional
The axis along which to stack the input arrays.
Returns
-------
COO
The output stacked array.
See Also
--------
numpy.stack : NumPy equivalent function
"""
assert len(set(x.shape for x in arrays)) == 1
arrays = [x if isinstance(x, COO) else COO(x) for x in arrays]
if axis < 0:
axis = axis + arrays[0].ndim + 1
data = np.concatenate([x.data for x in arrays])
coords = np.concatenate([x.coords for x in arrays], axis=1)
shape = list(arrays[0].shape)
shape.insert(axis, len(arrays))
coords_dtype = np.min_scalar_type(max(shape) - 1) if len(shape) != 0 else np.uint8
nnz = 0
dim = 0
new = np.empty(shape=(coords.shape[1],), dtype=coords_dtype)
for x in arrays:
new[nnz:x.nnz + nnz] = dim
dim += 1
nnz += x.nnz
has_duplicates = any(x.has_duplicates for x in arrays)
coords = [coords[i].astype(coords_dtype) for i in range(coords.shape[0])]
coords.insert(axis, new)
coords = np.stack(coords, axis=0)
return COO(coords, data, shape=shape, has_duplicates=has_duplicates,
sorted=(axis == 0) and all(a.sorted for a in arrays))
[docs]def triu(x, k=0):
"""
Returns an array with all elements below the k-th diagonal set to zero.
Parameters
----------
x : COO
The input array.
k : int, optional
The diagonal below which elements are set to zero. The default is
zero, which corresponds to the main diagonal.
Returns
-------
COO
The output upper-triangular matrix.
See Also
--------
numpy.triu : NumPy equivalent function
"""
if not x.ndim >= 2:
raise NotImplementedError('sparse.triu is not implemented for scalars or 1-D arrays.')
mask = x.coords[-2] + k <= x.coords[-1]
coords = x.coords[:, mask]
data = x.data[mask]
return COO(coords, data, x.shape, x.has_duplicates, x.sorted)
[docs]def tril(x, k=0):
"""
Returns an array with all elements above the k-th diagonal set to zero.
Parameters
----------
x : COO
The input array.
k : int, optional
The diagonal above which elements are set to zero. The default is
zero, which corresponds to the main diagonal.
Returns
-------
COO
The output lower-triangular matrix.
See Also
--------
numpy.tril : NumPy equivalent function
"""
if not x.ndim >= 2:
raise NotImplementedError('sparse.tril is not implemented for scalars or 1-D arrays.')
mask = x.coords[-2] + k >= x.coords[-1]
coords = x.coords[:, mask]
data = x.data[mask]
return COO(coords, data, x.shape, x.has_duplicates, x.sorted)
# (c) Paul Panzer
# Taken from https://stackoverflow.com/a/47833496/774273
# License: https://creativecommons.org/licenses/by-sa/3.0/
def _match_arrays(a, b):
"""
Finds all indexes into a and b such that a[i] = b[j]. The outputs are sorted
in lexographical order.
Parameters
----------
a, b : np.ndarray
The input 1-D arrays to match. If matching of multiple fields is
needed, use np.recarrays. These two arrays must be sorted.
Returns
-------
a_idx, b_idx : np.ndarray
The output indices of every possible pair of matching elements.
"""
if len(a) == 0 or len(b) == 0:
return np.array([], dtype=np.uint8), np.array([], dtype=np.uint8)
asw = np.r_[0, 1 + np.flatnonzero(a[:-1] != a[1:]), len(a)]
bsw = np.r_[0, 1 + np.flatnonzero(b[:-1] != b[1:]), len(b)]
al, bl = np.diff(asw), np.diff(bsw)
na = len(al)
asw, bsw = asw, bsw
abunq = np.r_[a[asw[:-1]], b[bsw[:-1]]]
m = np.argsort(abunq, kind='mergesort')
mv = abunq[m]
midx = np.flatnonzero(mv[:-1] == mv[1:])
ai, bi = m[midx], m[midx + 1] - na
aic = np.r_[0, np.cumsum(al[ai])]
a_idx = np.ones((aic[-1],), dtype=np.int_)
a_idx[aic[:-1]] = asw[ai]
a_idx[aic[1:-1]] -= asw[ai[:-1]] + al[ai[:-1]] - 1
a_idx = np.repeat(np.cumsum(a_idx), np.repeat(bl[bi], al[ai]))
bi = np.repeat(bi, al[ai])
bic = np.r_[0, np.cumsum(bl[bi])]
b_idx = np.ones((bic[-1],), dtype=np.int_)
b_idx[bic[:-1]] = bsw[bi]
b_idx[bic[1:-1]] -= bsw[bi[:-1]] + bl[bi[:-1]] - 1
b_idx = np.cumsum(b_idx)
return a_idx, b_idx
def _grouped_reduce(x, groups, method, **kwargs):
"""
Performs a :code:`ufunc` grouped reduce.
Parameters
----------
x : np.ndarray
The data to reduce.
groups : np.ndarray
The groups the data belongs to. The groups must be
contiguous.
method : np.ufunc
The :code:`ufunc` to use to perform the reduction.
kwargs : dict
The kwargs to pass to the :code:`ufunc`'s :code:`reduceat`
function.
Returns
-------
result : np.ndarray
The result of the grouped reduce operation.
inv_idx : np.ndarray
The index of the first element where each group is found.
counts : np.ndarray
The number of elements in each group.
"""
# Partial credit to @shoyer
# Ref: https://gist.github.com/shoyer/f538ac78ae904c936844
flag = np.concatenate(([True] if len(x) != 0 else [], groups[1:] != groups[:-1]))
inv_idx = np.flatnonzero(flag)
result = method.reduceat(x, inv_idx, **kwargs)
counts = np.diff(np.concatenate((inv_idx, [len(x)])))
return result, inv_idx, counts
def _elemwise_binary(func, self, other, *args, **kwargs):
check = kwargs.pop('check', True)
self_zero = _zero_of_dtype(self.dtype)
other_zero = _zero_of_dtype(other.dtype)
func_zero = _zero_of_dtype(func(self_zero, other_zero, *args, **kwargs).dtype)
if check and func(self_zero, other_zero, *args, **kwargs) != func_zero:
raise ValueError("Performing this operation would produce "
"a dense result: %s" % str(func))
if not isinstance(self, COO):
if not check or np.array_equiv(func(self, other_zero, *args, **kwargs), func_zero):
return _elemwise_binary_self_dense(func, self, other, *args, **kwargs)
else:
raise ValueError("Performing this operation would produce "
"a dense result: %s" % str(func))
if not isinstance(other, COO):
if not check or np.array_equiv(func(self_zero, other, *args, **kwargs), func_zero):
temp_func = _reverse_self_other(func)
return _elemwise_binary_self_dense(temp_func, other, self, *args, **kwargs)
else:
raise ValueError("Performing this operation would produce "
"a dense result: %s" % str(func))
self_shape, other_shape = self.shape, other.shape
result_shape = _get_broadcast_shape(self_shape, other_shape)
self_params = _get_broadcast_parameters(self.shape, result_shape)
other_params = _get_broadcast_parameters(other.shape, result_shape)
combined_params = [p1 and p2 for p1, p2 in zip(self_params, other_params)]
self_reduce_params = combined_params[-self.ndim:]
other_reduce_params = combined_params[-other.ndim:]
self.sum_duplicates() # TODO: document side-effect or make copy
other.sum_duplicates() # TODO: document side-effect or make copy
self_coords = self.coords
self_data = self.data
self_reduced_coords, self_reduced_shape = \
_get_reduced_coords(self_coords, self_shape,
self_reduce_params)
self_reduced_linear = _linear_loc(self_reduced_coords, self_reduced_shape)
i = np.argsort(self_reduced_linear)
self_reduced_linear = self_reduced_linear[i]
self_coords = self_coords[:, i]
self_data = self_data[i]
# Store coords
other_coords = other.coords
other_data = other.data
other_reduced_coords, other_reduced_shape = \
_get_reduced_coords(other_coords, other_shape,
other_reduce_params)
other_reduced_linear = _linear_loc(other_reduced_coords, other_reduced_shape)
i = np.argsort(other_reduced_linear)
other_reduced_linear = other_reduced_linear[i]
other_coords = other_coords[:, i]
other_data = other_data[i]
# Find matches between self.coords and other.coords
matched_self, matched_other = _match_arrays(self_reduced_linear,
other_reduced_linear)
# Start with an empty list. This may reduce computation in many cases.
data_list = []
coords_list = []
# Add the matched part.
matched_coords = _get_matching_coords(self_coords[:, matched_self],
other_coords[:, matched_other],
self_shape, other_shape)
data_list.append(func(self_data[matched_self],
other_data[matched_other],
*args, **kwargs))
coords_list.append(matched_coords)
self_func = func(self_data, other_zero, *args, **kwargs)
# Add unmatched parts as necessary.
if (self_func != func_zero).any():
self_unmatched_coords, self_unmatched_func = \
_get_unmatched_coords_data(self_coords, self_func, self_shape,
result_shape, matched_self,
matched_coords)
data_list.extend(self_unmatched_func)
coords_list.extend(self_unmatched_coords)
other_func = func(self_zero, other_data, *args, **kwargs)
if (other_func != func_zero).any():
other_unmatched_coords, other_unmatched_func = \
_get_unmatched_coords_data(other_coords, other_func, other_shape,
result_shape, matched_other,
matched_coords)
coords_list.extend(other_unmatched_coords)
data_list.extend(other_unmatched_func)
# Concatenate matches and mismatches
data = np.concatenate(data_list) if len(data_list) else np.empty((0,), dtype=self.dtype)
coords = np.concatenate(coords_list, axis=1) if len(coords_list) else \
np.empty((0, len(result_shape)), dtype=self.coords.dtype)
nonzero = data != func_zero
data = data[nonzero]
coords = coords[:, nonzero]
return COO(coords, data, shape=result_shape, has_duplicates=False)
def _elemwise_binary_self_dense(func, self, other, *args, **kwargs):
assert isinstance(self, np.ndarray)
assert isinstance(other, COO)
result_shape = _get_broadcast_shape(self.shape, other.shape)
if result_shape != other.shape:
other = other.broadcast_to(result_shape)
self = np.broadcast_to(self, result_shape)
self_coords = tuple([other.coords[i, :] for i in range(other.ndim)])
self_data = self[self_coords]
func_data = func(self_data, other.data, *args, **kwargs)
mask = func_data != 0
func_data = func_data[mask]
func_coords = other.coords[:, mask]
return COO(func_coords, func_data, shape=result_shape,
has_duplicates=other.has_duplicates,
sorted=other.sorted)
def _reverse_self_other(func):
def wrapper(*args, **kwargs):
return func(args[1], args[0], *args[2:], **kwargs)
return wrapper
def _get_unmatched_coords_data(coords, data, shape, result_shape, matched_idx,
matched_coords):
"""
Get the unmatched coordinates and data - both those that are unmatched with
any point of the other data as well as those which are added because of
broadcasting.
Parameters
----------
coords : np.ndarray
The coordinates to get the unmatched coordinates from.
data : np.ndarray
The data corresponding to these coordinates.
shape : tuple[int]
The shape corresponding to these coordinates.
result_shape : tuple[int]
The result broadcasting shape.
matched_idx : np.ndarray
The indices into the coords array where it matches with the other array.
matched_coords : np.ndarray
The overall coordinates that match from both arrays.
Returns
-------
coords_list : list[np.ndarray]
The list of unmatched/broadcasting coordinates.
data_list : list[np.ndarray]
The data corresponding to the coordinates.
"""
params = _get_broadcast_parameters(shape, result_shape)
matched = np.zeros(len(data), dtype=np.bool)
matched[matched_idx] = True
unmatched = ~matched
data_zero = _zero_of_dtype(data.dtype)
nonzero = data != data_zero
unmatched &= nonzero
matched &= nonzero
coords_list = []
data_list = []
unmatched_coords, unmatched_data = \
_get_expanded_coords_data(coords[:, unmatched],
data[unmatched],
params,
result_shape)
coords_list.append(unmatched_coords)
data_list.append(unmatched_data)
if shape != result_shape:
broadcast_coords, broadcast_data = \
_get_broadcast_coords_data(coords[:, matched],
matched_coords,
data[matched],
params,
result_shape)
coords_list.append(broadcast_coords)
data_list.append(broadcast_data)
return coords_list, data_list
def _get_broadcast_shape(shape1, shape2, is_result=False):
"""
Get the overall broadcasted shape.
Parameters
----------
shape1, shape2 : tuple[int]
The input shapes to broadcast together.
is_result : bool
Whether or not shape2 is also the result shape.
Returns
-------
result_shape : tuple[int]
The overall shape of the result.
Raises
------
ValueError
If the two shapes cannot be broadcast together.
"""
# https://stackoverflow.com/a/47244284/774273
if not all((l1 == l2) or (l1 == 1) or ((l2 == 1) and not is_result) for l1, l2 in
zip(shape1[::-1], shape2[::-1])):
raise ValueError('operands could not be broadcast together with shapes %s, %s' %
(shape1, shape2))
result_shape = tuple(max(l1, l2) for l1, l2 in
zip_longest(shape1[::-1], shape2[::-1], fillvalue=1))[::-1]
return result_shape
def _get_broadcast_parameters(shape, broadcast_shape):
"""
Get the broadcast parameters.
Parameters
----------
shape : tuple[int]
The input shape.
broadcast_shape
The shape to broadcast to.
Returns
-------
params : list
A list containing None if the dimension isn't in the original array, False if
it needs to be broadcast, and True if it doesn't.
"""
params = [None if l1 is None else l1 == l2 for l1, l2
in zip_longest(shape[::-1], broadcast_shape[::-1], fillvalue=None)][::-1]
return params
def _get_reduced_coords(coords, shape, params):
"""
Gets only those dimensions of the coordinates that don't need to be broadcast.
Parameters
----------
coords : np.ndarray
The coordinates to reduce.
params : list
The params from which to check which dimensions to get.
Returns
-------
reduced_coords : np.ndarray
The reduced coordinates.
"""
reduced_params = [bool(param) for param in params]
reduced_shape = tuple(l for l, p in zip(shape, params) if p)
return coords[reduced_params], reduced_shape
def _get_expanded_coords_data(coords, data, params, broadcast_shape):
"""
Expand coordinates/data to broadcast_shape. Does most of the heavy lifting for broadcast_to.
Produces sorted output for sorted inputs.
Parameters
----------
coords : np.ndarray
The coordinates to expand.
data : np.ndarray
The data corresponding to the coordinates.
params : list
The broadcast parameters.
broadcast_shape : tuple[int]
The shape to broadcast to.
Returns
-------
expanded_coords : np.ndarray
List of 1-D arrays. Each item in the list has one dimension of coordinates.
expanded_data : np.ndarray
The data corresponding to expanded_coords.
"""
first_dim = -1
expand_shapes = []
for d, p, l in zip(range(len(broadcast_shape)), params, broadcast_shape):
if p and first_dim == -1:
expand_shapes.append(coords.shape[1])
first_dim = d
if not p:
expand_shapes.append(l)
all_idx = _cartesian_product(*(np.arange(d, dtype=np.min_scalar_type(d - 1)) for d in expand_shapes))
dt = np.result_type(*(np.min_scalar_type(l - 1) for l in broadcast_shape))
false_dim = 0
dim = 0
expanded_coords = np.empty((len(broadcast_shape), all_idx.shape[1]), dtype=dt)
expanded_data = data[all_idx[first_dim]]
for d, p, l in zip(range(len(broadcast_shape)), params, broadcast_shape):
if p:
expanded_coords[d] = coords[dim, all_idx[first_dim]]
else:
expanded_coords[d] = all_idx[false_dim + (d > first_dim)]
false_dim += 1
if p is not None:
dim += 1
return np.asarray(expanded_coords), np.asarray(expanded_data)
# (c) senderle
# Taken from https://stackoverflow.com/a/11146645/774273
# License: https://creativecommons.org/licenses/by-sa/3.0/
def _cartesian_product(*arrays):
"""
Get the cartesian product of a number of arrays.
Parameters
----------
arrays : Tuple[np.ndarray]
The arrays to get a cartesian product of. Always sorted with respect
to the original array.
Returns
-------
out : np.ndarray
The overall cartesian product of all the input arrays.
"""
broadcastable = np.ix_(*arrays)
broadcasted = np.broadcast_arrays(*broadcastable)
rows, cols = np.prod(broadcasted[0].shape), len(broadcasted)
dtype = np.result_type(*arrays)
out = np.empty(rows * cols, dtype=dtype)
start, end = 0, rows
for a in broadcasted:
out[start:end] = a.reshape(-1)
start, end = end, end + rows
return out.reshape(cols, rows)
def _elemwise_unary(func, self, *args, **kwargs):
check = kwargs.pop('check', True)
data_zero = _zero_of_dtype(self.dtype)
func_zero = _zero_of_dtype(func(data_zero, *args, **kwargs).dtype)
if check and func(data_zero, *args, **kwargs) != func_zero:
raise ValueError("Performing this operation would produce "
"a dense result: %s" % str(func))
data_func = func(self.data, *args, **kwargs)
nonzero = data_func != func_zero
return COO(self.coords[:, nonzero], data_func[nonzero],
shape=self.shape,
has_duplicates=self.has_duplicates,
sorted=self.sorted)
def _get_matching_coords(coords1, coords2, shape1, shape2):
"""
Takes in the matching coordinates in both dimensions (only those dimensions that
don't need to be broadcast in both arrays and returns the coordinates that will
overlap in the output array, i.e., the coordinates for which both broadcast arrays
will be nonzero.
Parameters
----------
coords1, coords2 : np.ndarray
shape1, shape2 : tuple[int]
Returns
-------
matching_coords : np.ndarray
The coordinates of the output array for which both inputs will be nonzero.
"""
result_shape = _get_broadcast_shape(shape1, shape2)
params1 = _get_broadcast_parameters(shape1, result_shape)
params2 = _get_broadcast_parameters(shape2, result_shape)
matching_coords = []
dim1 = 0
dim2 = 0
for p1, p2 in zip(params1, params2):
if p1:
matching_coords.append(coords1[dim1])
else:
matching_coords.append(coords2[dim2])
if p1 is not None:
dim1 += 1
if p2 is not None:
dim2 += 1
return np.asarray(matching_coords)
def _get_broadcast_coords_data(coords, matched_coords, data, params, broadcast_shape):
"""
Get data that matched in the reduced coordinates but still had a partial overlap because of
the broadcast, i.e., it didn't match in one of the other dimensions.
Parameters
----------
coords : np.ndarray
The list of coordinates of the required array. Must be sorted.
matched_coords : np.ndarray
The list of coordinates that match. Must be sorted.
data : np.ndarray
The data corresponding to coords.
params : list
The broadcast parameters.
broadcast_shape : tuple[int]
The shape to get the broadcast coordinates.
Returns
-------
broadcast_coords : np.ndarray
The broadcasted coordinates. Is sorted.
broadcasted_data : np.ndarray
The data corresponding to those coordinates.
"""
full_coords, full_data = _get_expanded_coords_data(coords, data, params, broadcast_shape)
linear_full_coords = _linear_loc(full_coords, broadcast_shape)
linear_matched_coords = _linear_loc(matched_coords, broadcast_shape)
overlapping_coords, _ = _match_arrays(linear_full_coords, linear_matched_coords)
mask = np.ones(full_coords.shape[1], dtype=np.bool)
mask[overlapping_coords] = False
return full_coords[:, mask], full_data[mask]
def _linear_loc(coords, shape, signed=False):
n = reduce(operator.mul, shape, 1)
if signed:
n = -n
dtype = np.min_scalar_type(n)
out = np.zeros(coords.shape[1], dtype=dtype)
tmp = np.zeros(coords.shape[1], dtype=dtype)
strides = 1
for i, d in enumerate(shape[::-1]):
# out += self.coords[-(i + 1), :].astype(dtype) * strides
np.multiply(coords[-(i + 1), :], strides, out=tmp, dtype=dtype)
np.add(tmp, out, out=out)
strides *= d
return out