Repository: orchidas/Chord-Recognition Branch: master Commit: fa6a6471fef5 Files: 7 Total size: 17.1 KB Directory structure: gitextract_27_45xet/ ├── README.md ├── chromagram.py ├── create_templates.py ├── data/ │ ├── chord_templates.json │ └── test_chords/ │ └── readme.txt ├── hmm.py └── main.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: README.md ================================================ # Chord-Recognition
Run main.py with an input file name from data/test_chords/ with flag -m set to the method you want to use for detection, and -p for plotting the result. The default method is template matching. Example: ``` python3 main.py -i 'Grand Piano - Fazioli - major E middle.wav' -m hmm -p True ``` For help, run `python3 main.py -h`
================================================ FILE: chromagram.py ================================================ """ Algorithm based on the paper 'Automatic Chord Recognition from Audio Using Enhanced Pitch Class Profile' by Kyogu Lee This script computes 12 dimensional chromagram for chord detection @author ORCHISAMA """ from __future__ import division from scipy.signal import hamming from scipy.fftpack import fft import numpy as np import matplotlib.pyplot as plt def nearestPow2(inp): power = np.ceil(np.log2(inp)) return 2**power """Function to calculcate Harmonic Power Spectrum from DFT""" def HPS(dft, M): hps_len = int(np.ceil(np.size(dft) / (2**M))) hps = np.ones(hps_len) for n in range(hps_len): for m in range(M + 1): hps[n] *= np.absolute(dft[(2**m) * n]) return hps """Function to compute CQT using sparse matrix multiplication, Brown and Puckette 1992- fast""" def CQT_fast(x, fs, bins, fmin, fmax, M): threshold = 0.0054 # for Hamming window K = int(bins * np.ceil(np.log2(fmax / fmin))) Q = 1 / (2 ** (1 / bins) - 1) nfft = np.int32(nearestPow2(np.ceil(Q * fs / fmin))) tempKernel = np.zeros(nfft, dtype=np.complex) specKernel = np.zeros(nfft, dtype=np.complex) sparKernel = [] # create sparse Kernel for k in range(K - 1, -1, -1): fk = (2 ** (k / bins)) * fmin N = np.int32(np.round((Q * fs) / fk)) tempKernel[:N] = hamming(N) / N * np.exp(-2 * np.pi * 1j * Q * np.arange(N) / N) specKernel = fft(tempKernel) specKernel[np.where(np.abs(specKernel) <= threshold)] = 0 if k == K - 1: sparKernel = specKernel else: sparKernel = np.vstack((specKernel, sparKernel)) sparKernel = np.transpose(np.conjugate(sparKernel)) / nfft ft = fft(x, nfft) cqt = np.dot(ft, sparKernel) ft = fft(x, nfft * (2**M)) # calculate harmonic power spectrum # harm_pow = HPS(ft,M) # cqt = np.dot(harm_pow, sparKernel) return cqt """Function to compute constant Q Transform, Judith Brown, 1991 - slow""" def CQT_slow(x, fs, bins, fmin, fmax): K = int(bins * np.ceil(np.log2(fmax / fmin))) Q = 1 / (2 ** (1 / bins) - 1) cqt = np.zeros(K, dtype=np.complex) for k in range(K): fk = (2 ** (k / bins)) * fmin N = int(np.round(Q * fs / fk)) arr = -2 * np.pi * 1j * Q * np.arange(N) / N cqt[k] = np.dot(x[:N], np.transpose(hamming(N) * np.exp(arr))) / N return cqt """Function to compute Pitch Class Profile from constant Q transform""" def PCP(cqt, bins, M): CH = np.zeros(bins) for b in range(bins): CH[b] = np.sum(cqt[b + (np.arange(M) * bins)]) return CH def compute_chroma(x, fs): fmin = 96 fmax = 5250 bins = 12 M = 3 nOctave = np.int32(np.ceil(np.log2(fmax / fmin))) CH = np.zeros(bins) # Compute constant Q transform cqt_fast = CQT_fast(x, fs, bins, fmin, fmax, M) # get Pitch Class Profile CH = PCP(np.absolute(cqt_fast), bins, nOctave) return CH ================================================ FILE: create_templates.py ================================================ """ Algorithm based on the paper 'Automatic Chord Recognition from Audio Using Enhanced Pitch Class Profile' by Kyogu Lee This script computes 12 dimensional chromagram for chord detection @author ORCHISAMA DAS """ """Create pitch profile template for 12 major and 12 minor chords and save them in a json file Gmajor template = [1,0,0,0,1,0,0,1,0,0,0,0] - needs to be run just once""" import json template = dict() major = ["G", "G#", "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#"] minor = ["Gm", "G#m", "Am", "A#m", "Bm", "Cm", "C#m", "Dm", "D#m", "Em", "Fm", "F#m"] offset = 0 num_chords = len(major) # initialise lists with zeros for chord in range(num_chords): template[major[chord]] = list() template[minor[chord]] = list() for note in range(num_chords): template[major[chord]].append(0) template[minor[chord]].append(0) for chord in range(num_chords): for note in range(num_chords): if note == 0 or note == 7: template[major[chord]][(note + offset) % num_chords] = 1 template[minor[chord]][(note + offset) % num_chords] = 1 elif note == 4: template[major[chord]][(note + offset) % num_chords] = 1 elif note == 3: template[minor[chord]][(note + offset) % num_chords] = 1 offset += 1 # debugging for key, value in template.items(): print(key, value) # save as JSON file with open("chord_templates.json", "w") as fp: json.dump(template, fp, sort_keys=False) print("Saved succesfully to JSON file") ================================================ FILE: data/chord_templates.json ================================================ {"A#m": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0], "C#m": [0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0], "A#": [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0], "Dm": [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0], "C#": [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0], "Bm": [0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1], "G#": [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], "Fm": [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0], "A": [0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0], "C": [1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0], "B": [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], "E": [0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0], "D": [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1], "G": [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0], "F": [0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0], "G#m": [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0], "Em": [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0], "D#m": [0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1], "Cm": [1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], "Am": [0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0], "D#": [1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0], "F#": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1], "Gm": [1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0], "F#m": [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1]} ================================================ FILE: data/test_chords/readme.txt ================================================ piano chords downloaded from http://ibeat.org/piano-chords-free/. Give them credit under the Creative Commons License. ================================================ FILE: hmm.py ================================================ """Automatic chord recogniton with HMM, as suggested by Juan P. Bello in 'A mid level representation for harmonic content in music signals' @author ORCHISAMA DAS, 2016""" from __future__ import division from chromagram import compute_chroma import os import numpy as np """calculates multivariate gaussian matrix from mean and covariance matrices""" def multivariate_gaussian(x, meu, cov): det = np.linalg.det(cov) val = np.exp(-0.5 * np.dot(np.dot((x - meu).T, np.linalg.inv(cov)), (x - meu))) try: val /= np.sqrt(((2 * np.pi) ** 12) * det) except: print("Matrix is not positive, semi-definite") if np.isnan(val): val = np.finfo(float).eps return val """initialize the emission, transition and initialisation matrices for HMM in chord recognition PI - initialisation matrix, #A - transition matrix, #B - observation matrix""" def initialize(chroma, templates, chords, nested_cof): """initialising PI with equal probabilities""" num_chords = len(chords) PI = np.ones(num_chords) / num_chords """initialising A based on nested circle of fifths""" eps = 0.01 A = np.empty((num_chords, num_chords)) for chord in chords: ind = nested_cof.index(chord) t = ind for i in range(num_chords): if t >= num_chords: t = t % num_chords A[ind][t] = (abs(num_chords // 2 - i) + eps) / ( num_chords**2 + num_chords * eps ) t += 1 """initialising based on tonic triads - Mean matrix; Tonic with dominant - 0.8, tonic with mediant 0.6 and mediant-dominant 0.8, non-triad diagonal elements with 0.2 - covariance matrix""" nFrames = np.shape(chroma)[1] B = np.zeros((num_chords, nFrames)) meu_mat = np.zeros((num_chords, num_chords // 2)) cov_mat = np.zeros((num_chords, num_chords // 2, num_chords // 2)) meu_mat = np.array(templates) offset = 0 for i in range(num_chords): if i == num_chords // 2: offset = 0 tonic = offset if i < num_chords // 2: mediant = (tonic + 4) % (num_chords // 2) else: mediant = (tonic + 3) % (num_chords // 2) dominant = (tonic + 7) % (num_chords // 2) # weighted diagonal cov_mat[i, tonic, tonic] = 0.8 cov_mat[i, mediant, mediant] = 0.6 cov_mat[i, dominant, dominant] = 0.8 # off-diagonal - matrix not positive semidefinite, hence determinant is negative # for n in [tonic,mediant,dominant]: # for m in [tonic, mediant, dominant]: # if (n is tonic and m is mediant) or (n is mediant and m is tonic): # cov_mat[i,n,m] = 0.6 # else: # cov_mat[i,n,m] = 0.8 # filling non zero diagonals for j in range(num_chords // 2): if cov_mat[i, j, j] == 0: cov_mat[i, j, j] = 0.2 offset += 1 """observation matrix B is a multivariate Gaussian calculated from mean vector and covariance matrix""" for m in range(nFrames): for n in range(num_chords): B[n, m] = multivariate_gaussian( chroma[:, m], meu_mat[n, :], cov_mat[n, :, :] ) return (PI, A, B) """Viterbi algorithm to find Path with highest probability - dynamic programming""" def viterbi(PI, A, B): (nrow, ncol) = np.shape(B) path = np.zeros((nrow, ncol)) states = np.zeros((nrow, ncol)) path[:, 0] = PI * B[:, 0] for i in range(1, ncol): for j in range(nrow): s = [(path[k, i - 1] * A[k, j] * B[j, i], k) for k in range(nrow)] (prob, state) = max(s) path[j, i] = prob states[j, i - 1] = state return (path, states) ================================================ FILE: main.py ================================================ import numpy as np import os, sys, getopt import matplotlib.pyplot as plt from scipy.io.wavfile import read import json from chromagram import compute_chroma import hmm as hmm def get_templates(chords): """read from JSON file to get chord templates""" with open("data/chord_templates.json", "r") as fp: templates_json = json.load(fp) templates = [] for chord in chords: if chord == "N": continue templates.append(templates_json[chord]) return templates def get_nested_circle_of_fifths(): chords = [ "N", "G", "G#", "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "Gm", "G#m", "Am", "A#m", "Bm", "Cm", "C#m", "Dm", "D#m", "Em", "Fm", "F#m", ] nested_cof = [ "G", "Bm", "D", "F#m", "A", "C#m", "E", "G#m", "B", "D#m", "F#", "A#m", "C#", "Fm", "G#", "Cm", "D#", "Gm", "A#", "Dm", "F", "Am", "C", "Em", ] return chords, nested_cof def find_chords( x: np.ndarray, fs: int, templates: list, chords: list, nested_cof: list = None, method: str = None, plot: bool = False, ): """ Given a mono audio signal x, and its sampling frequency, fs, find chords in it using 'method' Args: x : mono audio signal fs : sampling frequency (Hz) templates: dictionary of chord templates chords: list of chords to search over nested_cof: nested circle of fifth chords method: template matching or HMM plot: if results should be plotted """ # framing audio, window length = 8192, hop size = 1024 and computing PCP nfft = 8192 hop_size = 1024 nFrames = int(np.round(len(x) / (nfft - hop_size))) # zero padding to make signal length long enough to have nFrames x = np.append(x, np.zeros(nfft)) xFrame = np.empty((nfft, nFrames)) start = 0 num_chords = len(templates) chroma = np.empty((num_chords // 2, nFrames)) id_chord = np.zeros(nFrames, dtype="int32") timestamp = np.zeros(nFrames) max_cor = np.zeros(nFrames) # step 1. compute chromagram for n in range(nFrames): xFrame[:, n] = x[start : start + nfft] start = start + nfft - hop_size timestamp[n] = n * (nfft - hop_size) / fs chroma[:, n] = compute_chroma(xFrame[:, n], fs) if method == "match_template": # correlate 12D chroma vector with each of # 24 major and minor chords for n in range(nFrames): cor_vec = np.zeros(num_chords) for ni in range(num_chords): cor_vec[ni] = np.correlate(chroma[:, n], np.array(templates[ni])) max_cor[n] = np.max(cor_vec) id_chord[n] = np.argmax(cor_vec) + 1 # if max_cor[n] < threshold, then no chord is played # might need to change threshold value id_chord[np.where(max_cor < 0.8 * np.max(max_cor))] = 0 final_chords = [chords[cid] for cid in id_chord] elif method == "hmm": # get max probability path from Viterbi algorithm (PI, A, B) = hmm.initialize(chroma, templates, chords, nested_cof) (path, states) = hmm.viterbi(PI, A, B) # normalize path for i in range(nFrames): path[:, i] /= sum(path[:, i]) # choose most likely chord - with max value in 'path' final_chords = [] indices = np.argmax(path, axis=0) final_states = np.zeros(nFrames) # find no chord zone set_zero = np.where(np.max(path, axis=0) < 0.3 * np.max(path))[0] if np.size(set_zero) > 0: indices[set_zero] = -1 # identify chords for i in range(nFrames): if indices[i] == -1: final_chords.append("NC") else: final_states[i] = states[indices[i], i] final_chords.append(chords[int(final_states[i])]) if plot: plt.figure() if method == "match_template": plt.yticks(np.arange(num_chords + 1), chords) plt.plot(timestamp, id_chord, marker="o") else: plt.yticks(np.arange(num_chords), chords) plt.plot(timestamp, np.int32(final_states), marker="o") plt.xlabel("Time in seconds") plt.ylabel("Chords") plt.title("Identified chords") plt.grid(True) plt.show() return timestamp, final_chords def main(argv): input_file = "" method = "" plot = False has_method = False try: opts, args = getopt.getopt(argv, "hi:m:p:", ["ifile=", "method=", "plot="]) except getopt.GetoptError: print("main.py -i