Source code for pypianoroll.metrics

"""Objective metrics for piano rolls.

Functions
---------

- drum_in_pattern_rate
- empty_beat_rate
- in_scale_rate
- n_pitch_classes_used
- n_pitches_used
- pitch_range
- pitch_range_tuple
- polyphonic_rate
- qualified_note_rate
- tonal_distance

"""
from math import nan
from typing import Sequence, Tuple

import numpy as np
from numpy import ndarray

__all__ = [
    "drum_in_pattern_rate",
    "empty_beat_rate",
    "in_scale_rate",
    "n_pitch_classes_used",
    "n_pitches_used",
    "pitch_range",
    "pitch_range_tuple",
    "polyphonic_rate",
    "qualified_note_rate",
    "tonal_distance",
]


def _to_chroma(pianoroll: ndarray) -> ndarray:
    """Return the unnormalized chroma features."""
    reshaped = pianoroll[:, :120].reshape(-1, 12, 10)
    reshaped[..., :8] += pianoroll[:, 120:].reshape(-1, 1, 8)
    return np.sum(reshaped, -1)


[docs]def empty_beat_rate(pianoroll: ndarray, resolution: int) -> float: r"""Return the ratio of empty beats. The empty-beat rate is defined as the ratio of the number of empty beats (where no note is played) to the total number of beats. Return NaN if song length is zero. .. math:: empty\_beat\_rate = \frac{\#(empty\_beats)}{\#(beats)} Parameters ---------- pianoroll : ndarray Piano roll to evaluate. Returns ------- float Empty-beat rate. """ reshaped = pianoroll.reshape(-1, resolution * pianoroll.shape[1]) if len(reshaped) < 1: return nan n_empty_beats = np.count_nonzero(reshaped.any(1)) return n_empty_beats / len(reshaped)
[docs]def n_pitches_used(pianoroll: ndarray) -> int: """Return the number of unique pitches used. Parameters ---------- pianoroll : ndarray Piano roll to evaluate. Returns ------- int Number of unique pitch classes used. See Also -------- :func:`pypianoroll.n_pitch_class_used`: Compute the number of unique pitch classes used. """ return np.count_nonzero(np.any(pianoroll, 0))
[docs]def n_pitch_classes_used(pianoroll: ndarray) -> int: """Return the number of unique pitch classes used. Parameters ---------- pianoroll : ndarray Piano roll to evaluate. Returns ------- int Number of unique pitch classes used. See Also -------- :func:`pypianoroll.n_pitches_used`: Compute the number of unique pitches used. """ return np.count_nonzero(_to_chroma(pianoroll).any(0))
[docs]def pitch_range_tuple(pianoroll) -> Tuple[float, float]: """Return the pitch range as a tuple `(lowest, highest)`. Returns ------- int or nan Highest active pitch. int or nan Lowest active pitch. See Also -------- :func:`pypianoroll.pitch_range`: Compute the pitch range. """ nonzero = pianoroll.any(0).nonzero()[0] if not nonzero.size: return nan, nan return nonzero[0], nonzero[-1]
[docs]def pitch_range(pianoroll) -> float: """Return the pitch range. Returns ------- int or nan Pitch range (in semitones), i.e., difference between the highest and the lowest active pitches. See Also -------- :func:`pypianoroll.pitch_range_tuple`: Return the pitch range as a tuple. """ lowest, highest = pitch_range_tuple(pianoroll) return highest - lowest
[docs]def qualified_note_rate(pianoroll: ndarray, threshold: float = 2) -> float: r"""Return the ratio of the number of the qualified notes. The qualified note rate is defined as the ratio of the number of qualified notes (notes longer than `threshold`, in time steps) to the total number of notes. Return NaN if no note is found. .. math:: qualified\_note\_rate = \frac{ \#(notes\_longer\_than\_the\_threshold) }{ \#(notes) } Parameters ---------- pianoroll : ndarray Piano roll to evaluate. threshold : int Threshold of note length to count into the numerator. Returns ------- float Qualified note rate. References ---------- 1. Hao-Wen Dong, Wen-Yi Hsiao, Li-Chia Yang, and Yi-Hsuan Yang, "MuseGAN: Multi-track sequential generative adversarial networks for symbolic music generation and accompaniment," in Proceedings of the 32nd AAAI Conference on Artificial Intelligence (AAAI), 2018. """ if np.issubdtype(pianoroll.dtype, np.bool_): pianoroll = pianoroll.astype(np.uint8) padded = np.pad(pianoroll, ((1, 1), (0, 0)), "constant") diff = np.diff(padded, axis=0).reshape(-1) onsets = (diff > 0).nonzero()[0] if len(onsets) < 1: return nan offsets = (diff < 0).nonzero()[0] n_qualified_notes = np.count_nonzero(offsets - onsets >= threshold) return n_qualified_notes / len(onsets)
[docs]def polyphonic_rate(pianoroll: ndarray, threshold: float = 2) -> float: r"""Return the ratio of time steps where multiple pitches are on. The polyphony rate is defined as the ratio of the number of time steps where multiple pitches are on to the total number of time steps. Drum tracks are ignored. Return NaN if song length is zero. This metric is used in [1], where it is called *polyphonicity*. .. math:: polyphony\_rate = \frac{ \#(time\_steps\_where\_multiple\_pitches\_are\_on) }{ \#(time\_steps) } Parameters ---------- pianoroll : ndarray Piano roll to evaluate. threshold : int Threshold of number of pitches to count into the numerator. Returns ------- float Polyphony rate. References ---------- 1. Hao-Wen Dong, Wen-Yi Hsiao, Li-Chia Yang, and Yi-Hsuan Yang, "MuseGAN: Multi-track sequential generative adversarial networks for symbolic music generation and accompaniment," in Proceedings of the 32nd AAAI Conference on Artificial Intelligence (AAAI), 2018. """ if len(pianoroll) < 1: return nan n_poly = np.count_nonzero(np.count_nonzero(pianoroll, 1) > threshold) return n_poly / len(pianoroll)
[docs]def drum_in_pattern_rate( pianoroll: ndarray, resolution: int, tolerance: float = 0.1 ) -> float: r"""Return the ratio of drum notes in a certain drum pattern. The drum-in-pattern rate is defined as the ratio of the number of notes in a certain scale to the total number of notes. Only drum tracks are considered. Return NaN if no drum note is found. This metric is used in [1]. .. math:: drum\_in\_pattern\_rate = \frac{ \#(drum\_notes\_in\_pattern)}{\#(drum\_notes)} Parameters ---------- pianoroll : ndarray Piano roll to evaluate. resolution : int Time steps per beat. tolerance : float, default: 0.1 Tolerance. Returns ------- float Drum-in-pattern rate. References ---------- 1. Hao-Wen Dong, Wen-Yi Hsiao, Li-Chia Yang, and Yi-Hsuan Yang, "MuseGAN: Multi-track sequential generative adversarial networks for symbolic music generation and accompaniment," in Proceedings of the 32nd AAAI Conference on Artificial Intelligence (AAAI), 2018. """ if resolution not in (4, 6, 8, 9, 12, 16, 18, 24): raise ValueError( "Unsupported beat resolution. Expect 4, 6, 8 ,9, 12, 16, 18 or 24." ) def _drum_pattern_mask(res, tol): """Return a drum pattern mask with the given tolerance.""" if res == 24: drum_pattern_mask = np.tile([1.0, tol, 0.0, 0.0, 0.0, tol], 4) elif res == 12: drum_pattern_mask = np.tile([1.0, tol, tol], 4) elif res == 6: drum_pattern_mask = np.tile([1.0, tol, tol], 2) elif res == 18: drum_pattern_mask = np.tile([1.0, tol, 0.0, 0.0, 0.0, tol], 3) elif res == 9: drum_pattern_mask = np.tile([1.0, tol, tol], 3) elif res == 16: drum_pattern_mask = np.tile([1.0, tol, 0.0, tol], 4) elif res == 8: drum_pattern_mask = np.tile([1.0, tol], 4) elif res == 4: drum_pattern_mask = np.tile([1.0, tol], 2) return drum_pattern_mask drum_pattern_mask = _drum_pattern_mask(resolution, tolerance) n_in_pattern = np.sum(drum_pattern_mask * np.count_nonzero(pianoroll, 1)) return n_in_pattern / np.count_nonzero(pianoroll)
def _get_scale(root: int, mode: str) -> ndarray: """Return the scale mask for a specific root.""" if mode == "major": a_scale_mask = np.array([0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1], bool) else: a_scale_mask = np.array([1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1], bool) return np.roll(a_scale_mask, root)
[docs]def in_scale_rate( pianoroll: ndarray, root: int = 3, mode: str = "major" ) -> float: r"""Return the ratio of pitches in a certain musical scale. The pitch-in-scale rate is defined as the ratio of the number of notes in a certain scale to the total number of notes. Drum tracks are ignored. Return NaN if no note is found. This metric is used in [1]. .. math:: pitch\_in\_scale\_rate = \frac{\#(notes\_in\_scale)}{\#(notes)} Parameters ---------- pianoroll : ndarray Piano roll to evaluate. root : int Root of the scale. mode : str, {'major', 'minor'} Mode of the scale. Returns ------- float Pitch-in-scale rate. See Also -------- :func:`muspy.scale_consistency`: Compute the largest pitch-in-class rate. References ---------- 1. Hao-Wen Dong, Wen-Yi Hsiao, Li-Chia Yang, and Yi-Hsuan Yang, "MuseGAN: Multi-track sequential generative adversarial networks for symbolic music generation and accompaniment," in Proceedings of the 32nd AAAI Conference on Artificial Intelligence (AAAI), 2018. """ chroma = _to_chroma(pianoroll) scale_mask = _get_scale(root, mode) n_in_scale = np.sum(scale_mask.reshape(-1, 12) * chroma) return n_in_scale / np.count_nonzero(pianoroll)
def _get_tonal_matrix(r1, r2, r3) -> ndarray: # pylint: disable=invalid-name """Return a tonal matrix for computing the tonal distance.""" tonal_matrix = np.empty((6, 12)) tonal_matrix[0] = r1 * np.sin(np.arange(12) * (7.0 / 6.0) * np.pi) tonal_matrix[1] = r1 * np.cos(np.arange(12) * (7.0 / 6.0) * np.pi) tonal_matrix[2] = r2 * np.sin(np.arange(12) * (3.0 / 2.0) * np.pi) tonal_matrix[3] = r2 * np.cos(np.arange(12) * (3.0 / 2.0) * np.pi) tonal_matrix[4] = r3 * np.sin(np.arange(12) * (2.0 / 3.0) * np.pi) tonal_matrix[5] = r3 * np.cos(np.arange(12) * (2.0 / 3.0) * np.pi) return tonal_matrix def _to_tonal_space( pianoroll: ndarray, resolution: int, tonal_matrix: ndarray ) -> ndarray: """Return the tensor in tonal space (chroma normalized per beat).""" beat_chroma = _to_chroma(pianoroll).reshape((-1, resolution, 12)) beat_chroma = beat_chroma / beat_chroma.sum(2, keepdims=True) return np.matmul(tonal_matrix, beat_chroma.T).T
[docs]def tonal_distance( pianoroll_1: ndarray, pianoroll_2: ndarray, resolution: int, radii: Sequence[float] = (1.0, 1.0, 0.5), ) -> float: """Return the tonal distance [1] between the two input piano rolls. Parameters ---------- pianoroll_1 : ndarray First piano roll to evaluate. pianoroll_2 : ndarray Second piano roll to evaluate. resolution : int Time steps per beat. radii : tuple of float Radii of the three tonal circles (see Equation 3 in [1]). References ---------- 1. Christopher Harte, Mark Sandler, and Martin Gasser, "Detecting harmonic change in musical audio," in Proceedings of the 1st ACM workshop on Audio and music computing multimedia, 2006. """ assert len(pianoroll_1) == len( pianoroll_2 ), "Input piano rolls must have the same length." r1, r2, r3 = radii # pylint: disable=invalid-name tonal_matrix = _get_tonal_matrix(r1, r2, r3) mapped_1 = _to_tonal_space(pianoroll_1, resolution, tonal_matrix) mapped_2 = _to_tonal_space(pianoroll_2, resolution, tonal_matrix) return np.linalg.norm(mapped_1 - mapped_2)