Source code for pypianoroll.metrics

"""Objective metrics for piano rolls.

Functions
---------

- drum_in_pattern_rate
- empty_beat_rate
- in_scale_rate
- n_pitch_classes_used
- n_pitches_used
- pitch_range
- pitch_range_tuple
- polyphonic_rate
- qualified_note_rate
- tonal_distance

"""
from math import nan
from typing import Sequence, Tuple

import numpy as np
from numpy import ndarray

__all__ = [
    "drum_in_pattern_rate",
    "empty_beat_rate",
    "in_scale_rate",
    "n_pitch_classes_used",
    "n_pitches_used",
    "pitch_range",
    "pitch_range_tuple",
    "polyphonic_rate",
    "qualified_note_rate",
    "tonal_distance",
]


def _to_chroma(pianoroll: ndarray) -> ndarray:
    """Return the unnormalized chroma features."""
    reshaped = pianoroll[:, :120].reshape(-1, 12, 10)
    reshaped[..., :8] += pianoroll[:, 120:].reshape(-1, 1, 8)
    return np.sum(reshaped, -1)


[docs]def empty_beat_rate(pianoroll: ndarray, resolution: int) -> float:
    r"""Return the ratio of empty beats.

    The empty-beat rate is defined as the ratio of the number of empty
    beats (where no note is played) to the total number of beats. Return
    NaN if song length is zero.

    .. math:: empty\_beat\_rate = \frac{\#(empty\_beats)}{\#(beats)}

    Parameters
    ----------
    pianoroll : ndarray
        Piano roll to evaluate.

    Returns
    -------
    float
        Empty-beat rate.

    """
    reshaped = pianoroll.reshape(-1, resolution * pianoroll.shape[1])
    if len(reshaped) < 1:
        return nan
    n_empty_beats = np.count_nonzero(reshaped.any(1))
    return n_empty_beats / len(reshaped)


[docs]def n_pitches_used(pianoroll: ndarray) -> int:
    """Return the number of unique pitches used.

    Parameters
    ----------
    pianoroll : ndarray
        Piano roll to evaluate.

    Returns
    -------
    int
        Number of unique pitch classes used.

    See Also
    --------
    :func:`pypianoroll.n_pitch_class_used`: Compute the number of unique
      pitch classes used.

    """
    return np.count_nonzero(np.any(pianoroll, 0))


[docs]def n_pitch_classes_used(pianoroll: ndarray) -> int:
    """Return the number of unique pitch classes used.

    Parameters
    ----------
    pianoroll : ndarray
        Piano roll to evaluate.

    Returns
    -------
    int
        Number of unique pitch classes used.

    See Also
    --------
    :func:`pypianoroll.n_pitches_used`: Compute the number of unique
      pitches used.

    """
    return np.count_nonzero(_to_chroma(pianoroll).any(0))


[docs]def pitch_range_tuple(pianoroll) -> Tuple[float, float]:
    """Return the pitch range as a tuple `(lowest, highest)`.

    Returns
    -------
    int or nan
        Highest active pitch.
    int or nan
        Lowest active pitch.

    See Also
    --------
    :func:`pypianoroll.pitch_range`: Compute the pitch range.

    """
    nonzero = pianoroll.any(0).nonzero()[0]
    if not nonzero.size:
        return nan, nan
    return nonzero[0], nonzero[-1]


[docs]def pitch_range(pianoroll) -> float:
    """Return the pitch range.

    Returns
    -------
    int or nan
        Pitch range (in semitones), i.e., difference between the
        highest and the lowest active pitches.

    See Also
    --------
    :func:`pypianoroll.pitch_range_tuple`: Return the pitch range as a
      tuple.

    """
    lowest, highest = pitch_range_tuple(pianoroll)
    return highest - lowest


[docs]def qualified_note_rate(pianoroll: ndarray, threshold: float = 2) -> float:
    r"""Return the ratio of the number of the qualified notes.

    The qualified note rate is defined as the ratio of the number of
    qualified notes (notes longer than `threshold`, in time steps) to
    the total number of notes. Return NaN if no note is found.

    .. math::
        qualified\_note\_rate = \frac{
            \#(notes\_longer\_than\_the\_threshold)
        }{
            \#(notes)
        }

    Parameters
    ----------
    pianoroll : ndarray
        Piano roll to evaluate.
    threshold : int
        Threshold of note length to count into the numerator.

    Returns
    -------
    float
        Qualified note rate.

    References
    ----------
    1. Hao-Wen Dong, Wen-Yi Hsiao, Li-Chia Yang, and Yi-Hsuan Yang,
       "MuseGAN: Multi-track sequential generative adversarial networks
       for symbolic music generation and accompaniment," in Proceedings
       of the 32nd AAAI Conference on Artificial Intelligence (AAAI),
       2018.

    """
    if np.issubdtype(pianoroll.dtype, np.bool_):
        pianoroll = pianoroll.astype(np.uint8)
    padded = np.pad(pianoroll, ((1, 1), (0, 0)), "constant")
    diff = np.diff(padded, axis=0).reshape(-1)
    onsets = (diff > 0).nonzero()[0]
    if len(onsets) < 1:
        return nan
    offsets = (diff < 0).nonzero()[0]
    n_qualified_notes = np.count_nonzero(offsets - onsets >= threshold)
    return n_qualified_notes / len(onsets)


[docs]def polyphonic_rate(pianoroll: ndarray, threshold: float = 2) -> float:
    r"""Return the ratio of time steps where multiple pitches are on.

    The polyphony rate is defined as the ratio of the number of time
    steps where multiple pitches are on to the total number of time
    steps. Drum tracks are ignored. Return NaN if song length is zero.
    This metric is used in [1], where it is called *polyphonicity*.

    .. math::
        polyphony\_rate = \frac{
            \#(time\_steps\_where\_multiple\_pitches\_are\_on)
        }{
            \#(time\_steps)
        }

    Parameters
    ----------
    pianoroll : ndarray
        Piano roll to evaluate.
    threshold : int
        Threshold of number of pitches to count into the numerator.

    Returns
    -------
    float
        Polyphony rate.

    References
    ----------
    1. Hao-Wen Dong, Wen-Yi Hsiao, Li-Chia Yang, and Yi-Hsuan Yang,
       "MuseGAN: Multi-track sequential generative adversarial networks
       for symbolic music generation and accompaniment," in Proceedings
       of the 32nd AAAI Conference on Artificial Intelligence (AAAI),
       2018.

    """
    if len(pianoroll) < 1:
        return nan
    n_poly = np.count_nonzero(np.count_nonzero(pianoroll, 1) > threshold)
    return n_poly / len(pianoroll)


[docs]def drum_in_pattern_rate(
    pianoroll: ndarray, resolution: int, tolerance: float = 0.1
) -> float:
    r"""Return the ratio of drum notes in a certain drum pattern.

    The drum-in-pattern rate is defined as the ratio of the number of
    notes in a certain scale to the total number of notes. Only drum
    tracks are considered. Return NaN if no drum note is found. This
    metric is used in [1].

    .. math::
        drum\_in\_pattern\_rate = \frac{
            \#(drum\_notes\_in\_pattern)}{\#(drum\_notes)}

    Parameters
    ----------
    pianoroll : ndarray
        Piano roll to evaluate.
    resolution : int
        Time steps per beat.
    tolerance : float, default: 0.1
        Tolerance.

    Returns
    -------
    float
        Drum-in-pattern rate.

    References
    ----------
    1. Hao-Wen Dong, Wen-Yi Hsiao, Li-Chia Yang, and Yi-Hsuan Yang,
       "MuseGAN: Multi-track sequential generative adversarial networks
       for symbolic music generation and accompaniment," in Proceedings
       of the 32nd AAAI Conference on Artificial Intelligence (AAAI),
       2018.

    """
    if resolution not in (4, 6, 8, 9, 12, 16, 18, 24):
        raise ValueError(
            "Unsupported beat resolution. Expect 4, 6, 8 ,9, 12, 16, 18 or 24."
        )

    def _drum_pattern_mask(res, tol):
        """Return a drum pattern mask with the given tolerance."""
        if res == 24:
            drum_pattern_mask = np.tile([1.0, tol, 0.0, 0.0, 0.0, tol], 4)
        elif res == 12:
            drum_pattern_mask = np.tile([1.0, tol, tol], 4)
        elif res == 6:
            drum_pattern_mask = np.tile([1.0, tol, tol], 2)
        elif res == 18:
            drum_pattern_mask = np.tile([1.0, tol, 0.0, 0.0, 0.0, tol], 3)
        elif res == 9:
            drum_pattern_mask = np.tile([1.0, tol, tol], 3)
        elif res == 16:
            drum_pattern_mask = np.tile([1.0, tol, 0.0, tol], 4)
        elif res == 8:
            drum_pattern_mask = np.tile([1.0, tol], 4)
        elif res == 4:
            drum_pattern_mask = np.tile([1.0, tol], 2)
        return drum_pattern_mask

    drum_pattern_mask = _drum_pattern_mask(resolution, tolerance)
    n_in_pattern = np.sum(drum_pattern_mask * np.count_nonzero(pianoroll, 1))
    return n_in_pattern / np.count_nonzero(pianoroll)


def _get_scale(root: int, mode: str) -> ndarray:
    """Return the scale mask for a specific root."""
    if mode == "major":
        a_scale_mask = np.array([0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1], bool)
    else:
        a_scale_mask = np.array([1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1], bool)
    return np.roll(a_scale_mask, root)


[docs]def in_scale_rate(
    pianoroll: ndarray, root: int = 3, mode: str = "major"
) -> float:
    r"""Return the ratio of pitches in a certain musical scale.

    The pitch-in-scale rate is defined as the ratio of the number of
    notes in a certain scale to the total number of notes. Drum tracks
    are ignored. Return NaN if no note is found. This metric is used in
    [1].

    .. math::
        pitch\_in\_scale\_rate = \frac{\#(notes\_in\_scale)}{\#(notes)}

    Parameters
    ----------
    pianoroll : ndarray
        Piano roll to evaluate.
    root : int
        Root of the scale.
    mode : str, {'major', 'minor'}
        Mode of the scale.

    Returns
    -------
    float
        Pitch-in-scale rate.

    See Also
    --------
    :func:`muspy.scale_consistency`: Compute the largest pitch-in-class
      rate.

    References
    ----------
    1. Hao-Wen Dong, Wen-Yi Hsiao, Li-Chia Yang, and Yi-Hsuan Yang,
       "MuseGAN: Multi-track sequential generative adversarial networks
       for symbolic music generation and accompaniment," in Proceedings
       of the 32nd AAAI Conference on Artificial Intelligence (AAAI),
       2018.

    """
    chroma = _to_chroma(pianoroll)
    scale_mask = _get_scale(root, mode)
    n_in_scale = np.sum(scale_mask.reshape(-1, 12) * chroma)
    return n_in_scale / np.count_nonzero(pianoroll)


def _get_tonal_matrix(r1, r2, r3) -> ndarray:  # pylint: disable=invalid-name
    """Return a tonal matrix for computing the tonal distance."""
    tonal_matrix = np.empty((6, 12))
    tonal_matrix[0] = r1 * np.sin(np.arange(12) * (7.0 / 6.0) * np.pi)
    tonal_matrix[1] = r1 * np.cos(np.arange(12) * (7.0 / 6.0) * np.pi)
    tonal_matrix[2] = r2 * np.sin(np.arange(12) * (3.0 / 2.0) * np.pi)
    tonal_matrix[3] = r2 * np.cos(np.arange(12) * (3.0 / 2.0) * np.pi)
    tonal_matrix[4] = r3 * np.sin(np.arange(12) * (2.0 / 3.0) * np.pi)
    tonal_matrix[5] = r3 * np.cos(np.arange(12) * (2.0 / 3.0) * np.pi)
    return tonal_matrix


def _to_tonal_space(
    pianoroll: ndarray, resolution: int, tonal_matrix: ndarray
) -> ndarray:
    """Return the tensor in tonal space (chroma normalized per beat)."""
    beat_chroma = _to_chroma(pianoroll).reshape((-1, resolution, 12))
    beat_chroma = beat_chroma / beat_chroma.sum(2, keepdims=True)
    return np.matmul(tonal_matrix, beat_chroma.T).T


[docs]def tonal_distance(
    pianoroll_1: ndarray,
    pianoroll_2: ndarray,
    resolution: int,
    radii: Sequence[float] = (1.0, 1.0, 0.5),
) -> float:
    """Return the tonal distance [1] between the two input piano rolls.

    Parameters
    ----------
    pianoroll_1 : ndarray
        First piano roll to evaluate.
    pianoroll_2 : ndarray
        Second piano roll to evaluate.
    resolution : int
        Time steps per beat.
    radii : tuple of float
        Radii of the three tonal circles (see Equation 3 in [1]).

    References
    ----------
    1. Christopher Harte, Mark Sandler, and Martin Gasser, "Detecting
       harmonic change in musical audio," in Proceedings of the 1st ACM
       workshop on Audio and music computing multimedia, 2006.

    """
    assert len(pianoroll_1) == len(
        pianoroll_2
    ), "Input piano rolls must have the same length."

    r1, r2, r3 = radii  # pylint: disable=invalid-name
    tonal_matrix = _get_tonal_matrix(r1, r2, r3)
    mapped_1 = _to_tonal_space(pianoroll_1, resolution, tonal_matrix)
    mapped_2 = _to_tonal_space(pianoroll_2, resolution, tonal_matrix)
    return np.linalg.norm(mapped_1 - mapped_2)