Skip to content

random

Generate a random sparse multidimensional array

Parameters:

Name Type Description Default
shape Tuple[int]

Shape of the array

required
density float

Density of the generated array; default is 0.01. Mutually exclusive with nnz.

None
nnz int

Number of nonzero elements in the generated array. Mutually exclusive with density.

None
random_state Union[`numpy.random.Generator, int`]

Random number generator or random seed. If not given, the singleton numpy.random will be used. This random state will be used for sampling the sparsity structure, but not necessarily for sampling the values of the structurally nonzero entries of the matrix.

None
data_rvs Callable

Data generation callback. Must accept one single parameter: number of nnz elements, and return one single NumPy array of exactly that length.

None
format str

The format to return the output array in.

'coo'
fill_value scalar

The fill value of the output array.

None

Returns:

Type Description
SparseArray

The generated random matrix.

See Also

Examples:

>>> from scipy import stats
>>> rng = np.random.default_rng(42)
>>> rvs = lambda x: stats.poisson(25, loc=10).rvs(x, random_state=rng)
>>> s = sparse.random((2, 3, 4), density=0.25, random_state=rng, data_rvs=rvs)
>>> s.todense()
array([[[39,  0,  0,  0],
        [28, 33,  0, 37],
        [ 0,  0,  0,  0]],

       [[ 0,  0,  0,  0],
        [ 0,  0, 34,  0],
        [ 0,  0,  0, 36]]])
Source code in sparse/numba_backend/_utils.py
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
def random(
    shape,
    density=None,
    nnz=None,
    random_state=None,
    data_rvs=None,
    format="coo",
    fill_value=None,
    idx_dtype=None,
    **kwargs,
):
    """Generate a random sparse multidimensional array

    Parameters
    ----------
    shape : Tuple[int]
        Shape of the array
    density : float, optional
        Density of the generated array; default is 0.01.
        Mutually exclusive with `nnz`.
    nnz : int, optional
        Number of nonzero elements in the generated array.
        Mutually exclusive with `density`.
    random_state : Union[`numpy.random.Generator, int`], optional
        Random number generator or random seed. If not given, the
        singleton numpy.random will be used. This random state will be used
        for sampling the sparsity structure, but not necessarily for sampling
        the values of the structurally nonzero entries of the matrix.
    data_rvs : Callable
        Data generation callback. Must accept one single parameter: number of
        `nnz` elements, and return one single NumPy array of exactly
        that length.
    format : str
        The format to return the output array in.
    fill_value : scalar
        The fill value of the output array.

    Returns
    -------
    SparseArray
        The generated random matrix.

    See Also
    --------
    - [`scipy.sparse.rand`][] : Equivalent Scipy function.
    - [`numpy.random.rand`][] : Similar Numpy function.

    Examples
    --------
    >>> from scipy import stats
    >>> rng = np.random.default_rng(42)
    >>> rvs = lambda x: stats.poisson(25, loc=10).rvs(x, random_state=rng)
    >>> s = sparse.random((2, 3, 4), density=0.25, random_state=rng, data_rvs=rvs)
    >>> s.todense()
    array([[[39,  0,  0,  0],
            [28, 33,  0, 37],
            [ 0,  0,  0,  0]],
    <BLANKLINE>
           [[ 0,  0,  0,  0],
            [ 0,  0, 34,  0],
            [ 0,  0,  0, 36]]])
    """
    # Copied, in large part, from scipy.sparse.random
    # See https://github.com/scipy/scipy/blob/main/LICENSE.txt
    from ._coo import COO

    if density is not None and nnz is not None:
        raise ValueError("'density' and 'nnz' are mutually exclusive")

    if density is None:
        density = 0.01
    if not (0 <= density <= 1):
        raise ValueError(f"density {density} is not in the unit interval")

    elements = np.prod(shape, dtype=np.intp)

    if nnz is None:
        nnz = int(elements * density)
    if not (0 <= nnz <= elements):
        raise ValueError(f"cannot generate {nnz} nonzero elements for an array with {elements} total elements")

    if random_state is None:
        random_state = default_rng
    elif isinstance(random_state, Integral):
        random_state = np.random.default_rng(random_state)
    if data_rvs is None:
        data_rvs = random_state.random

    if nnz == elements or density >= 1:
        ind = np.arange(elements)
    elif nnz < 2:
        ind = random_state.choice(elements, nnz)
    # Faster to find non-sampled indices and remove them for dens > .5
    elif elements - nnz < 2:
        ind = reverse(random_state.choice(elements, elements - nnz), elements)
    elif nnz > elements / 2:
        nnztemp = elements - nnz
        # Using algorithm A for dens > .1
        if elements > 10 * nnztemp:
            ind = reverse(
                algD(nnztemp, elements, random_state),
                elements,
            )
        else:
            ind = reverse(
                algA(nnztemp, elements, random_state),
                elements,
            )
    else:
        ind = algD(nnz, elements, random_state) if elements > 10 * nnz else algA(nnz, elements, random_state)
    data = data_rvs(nnz)

    ar = COO(
        ind[None, :],
        data,
        shape=elements,
        fill_value=fill_value,
    ).reshape(shape)

    if idx_dtype:
        if can_store(idx_dtype, max(shape)):
            ar.coords = ar.coords.astype(idx_dtype)
        else:
            raise ValueError(f"cannot cast array with shape {shape} to dtype {idx_dtype}.")

    return ar.asformat(format, **kwargs)