Repository: orchidas/Chord-Recognition Branch: master Commit: fa6a6471fef5 Files: 7 Total size: 17.1 KB Directory structure: gitextract_27_45xet/ ├── README.md ├── chromagram.py ├── create_templates.py ├── data/ │ ├── chord_templates.json │ └── test_chords/ │ └── readme.txt ├── hmm.py └── main.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: README.md ================================================ # Chord-Recognition

Automatic chord recognition in Python

Chords are identified automatically from monophonic/polyphonic audio. The feature extracted is called the Pitch Class Profile, which is obtained by computing the Constant Q Transform. Two methods are used for classification:

Template matching - The pitch profile class is correlated with 24 major and minor chords, and the chord with highest correlation is identified. Details given in the paper Automatic Chord Recognition from Audio Using Enhanced Pitch Class Profile - Kyogu Lee in Proc. of ICMC, 2006.
Hidden Markov Model - HMM is trained based on music theory according to the paper A Robust Mid-level Representation for Harmonic Content in Music Signals - Juan P. Bello, Proc. of ISMIR, 2005. Viterbi decoding is used to estimate chord sequence in multi-timral, polyphonic music.

Usage

Run main.py with an input file name from data/test_chords/ with flag -m set to the method you want to use for detection, and -p for plotting the result. The default method is template matching. Example: ``` python3 main.py -i 'Grand Piano - Fazioli - major E middle.wav' -m hmm -p True ``` For help, run `python3 main.py -h`

================================================ FILE: chromagram.py ================================================ """ Algorithm based on the paper 'Automatic Chord Recognition from Audio Using Enhanced Pitch Class Profile' by Kyogu Lee This script computes 12 dimensional chromagram for chord detection @author ORCHISAMA """ from __future__ import division from scipy.signal import hamming from scipy.fftpack import fft import numpy as np import matplotlib.pyplot as plt def nearestPow2(inp): power = np.ceil(np.log2(inp)) return 2**power """Function to calculcate Harmonic Power Spectrum from DFT""" def HPS(dft, M): hps_len = int(np.ceil(np.size(dft) / (2**M))) hps = np.ones(hps_len) for n in range(hps_len): for m in range(M + 1): hps[n] *= np.absolute(dft[(2**m) * n]) return hps """Function to compute CQT using sparse matrix multiplication, Brown and Puckette 1992- fast""" def CQT_fast(x, fs, bins, fmin, fmax, M): threshold = 0.0054 # for Hamming window K = int(bins * np.ceil(np.log2(fmax / fmin))) Q = 1 / (2 ** (1 / bins) - 1) nfft = np.int32(nearestPow2(np.ceil(Q * fs / fmin))) tempKernel = np.zeros(nfft, dtype=np.complex) specKernel = np.zeros(nfft, dtype=np.complex) sparKernel = [] # create sparse Kernel for k in range(K - 1, -1, -1): fk = (2 ** (k / bins)) * fmin N = np.int32(np.round((Q * fs) / fk)) tempKernel[:N] = hamming(N) / N * np.exp(-2 * np.pi * 1j * Q * np.arange(N) / N) specKernel = fft(tempKernel) specKernel[np.where(np.abs(specKernel) <= threshold)] = 0 if k == K - 1: sparKernel = specKernel else: sparKernel = np.vstack((specKernel, sparKernel)) sparKernel = np.transpose(np.conjugate(sparKernel)) / nfft ft = fft(x, nfft) cqt = np.dot(ft, sparKernel) ft = fft(x, nfft * (2**M)) # calculate harmonic power spectrum # harm_pow = HPS(ft,M) # cqt = np.dot(harm_pow, sparKernel) return cqt """Function to compute constant Q Transform, Judith Brown, 1991 - slow""" def CQT_slow(x, fs, bins, fmin, fmax): K = int(bins * np.ceil(np.log2(fmax / fmin))) Q = 1 / (2 ** (1 / bins) - 1) cqt = np.zeros(K, dtype=np.complex) for k in range(K): fk = (2 ** (k / bins)) * fmin N = int(np.round(Q * fs / fk)) arr = -2 * np.pi * 1j * Q * np.arange(N) / N cqt[k] = np.dot(x[:N], np.transpose(hamming(N) * np.exp(arr))) / N return cqt """Function to compute Pitch Class Profile from constant Q transform""" def PCP(cqt, bins, M): CH = np.zeros(bins) for b in range(bins): CH[b] = np.sum(cqt[b + (np.arange(M) * bins)]) return CH def compute_chroma(x, fs): fmin = 96 fmax = 5250 bins = 12 M = 3 nOctave = np.int32(np.ceil(np.log2(fmax / fmin))) CH = np.zeros(bins) # Compute constant Q transform cqt_fast = CQT_fast(x, fs, bins, fmin, fmax, M) # get Pitch Class Profile CH = PCP(np.absolute(cqt_fast), bins, nOctave) return CH ================================================ FILE: create_templates.py ================================================ """ Algorithm based on the paper 'Automatic Chord Recognition from Audio Using Enhanced Pitch Class Profile' by Kyogu Lee This script computes 12 dimensional chromagram for chord detection @author ORCHISAMA DAS """ """Create pitch profile template for 12 major and 12 minor chords and save them in a json file Gmajor template = [1,0,0,0,1,0,0,1,0,0,0,0] - needs to be run just once""" import json template = dict() major = ["G", "G#", "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#"] minor = ["Gm", "G#m", "Am", "A#m", "Bm", "Cm", "C#m", "Dm", "D#m", "Em", "Fm", "F#m"] offset = 0 num_chords = len(major) # initialise lists with zeros for chord in range(num_chords): template[major[chord]] = list() template[minor[chord]] = list() for note in range(num_chords): template[major[chord]].append(0) template[minor[chord]].append(0) for chord in range(num_chords): for note in range(num_chords): if note == 0 or note == 7: template[major[chord]][(note + offset) % num_chords] = 1 template[minor[chord]][(note + offset) % num_chords] = 1 elif note == 4: template[major[chord]][(note + offset) % num_chords] = 1 elif note == 3: template[minor[chord]][(note + offset) % num_chords] = 1 offset += 1 # debugging for key, value in template.items(): print(key, value) # save as JSON file with open("chord_templates.json", "w") as fp: json.dump(template, fp, sort_keys=False) print("Saved succesfully to JSON file") ================================================ FILE: data/chord_templates.json ================================================ {"A#m": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0], "C#m": [0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0], "A#": [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0], "Dm": [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0], "C#": [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0], "Bm": [0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1], "G#": [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], "Fm": [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0], "A": [0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0], "C": [1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0], "B": [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], "E": [0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0], "D": [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1], "G": [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0], "F": [0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0], "G#m": [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0], "Em": [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0], "D#m": [0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1], "Cm": [1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], "Am": [0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0], "D#": [1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0], "F#": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1], "Gm": [1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0], "F#m": [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1]} ================================================ FILE: data/test_chords/readme.txt ================================================ piano chords downloaded from http://ibeat.org/piano-chords-free/. Give them credit under the Creative Commons License. ================================================ FILE: hmm.py ================================================ """Automatic chord recogniton with HMM, as suggested by Juan P. Bello in 'A mid level representation for harmonic content in music signals' @author ORCHISAMA DAS, 2016""" from __future__ import division from chromagram import compute_chroma import os import numpy as np """calculates multivariate gaussian matrix from mean and covariance matrices""" def multivariate_gaussian(x, meu, cov): det = np.linalg.det(cov) val = np.exp(-0.5 * np.dot(np.dot((x - meu).T, np.linalg.inv(cov)), (x - meu))) try: val /= np.sqrt(((2 * np.pi) ** 12) * det) except: print("Matrix is not positive, semi-definite") if np.isnan(val): val = np.finfo(float).eps return val """initialize the emission, transition and initialisation matrices for HMM in chord recognition PI - initialisation matrix, #A - transition matrix, #B - observation matrix""" def initialize(chroma, templates, chords, nested_cof): """initialising PI with equal probabilities""" num_chords = len(chords) PI = np.ones(num_chords) / num_chords """initialising A based on nested circle of fifths""" eps = 0.01 A = np.empty((num_chords, num_chords)) for chord in chords: ind = nested_cof.index(chord) t = ind for i in range(num_chords): if t >= num_chords: t = t % num_chords A[ind][t] = (abs(num_chords // 2 - i) + eps) / ( num_chords**2 + num_chords * eps ) t += 1 """initialising based on tonic triads - Mean matrix; Tonic with dominant - 0.8, tonic with mediant 0.6 and mediant-dominant 0.8, non-triad diagonal elements with 0.2 - covariance matrix""" nFrames = np.shape(chroma)[1] B = np.zeros((num_chords, nFrames)) meu_mat = np.zeros((num_chords, num_chords // 2)) cov_mat = np.zeros((num_chords, num_chords // 2, num_chords // 2)) meu_mat = np.array(templates) offset = 0 for i in range(num_chords): if i == num_chords // 2: offset = 0 tonic = offset if i < num_chords // 2: mediant = (tonic + 4) % (num_chords // 2) else: mediant = (tonic + 3) % (num_chords // 2) dominant = (tonic + 7) % (num_chords // 2) # weighted diagonal cov_mat[i, tonic, tonic] = 0.8 cov_mat[i, mediant, mediant] = 0.6 cov_mat[i, dominant, dominant] = 0.8 # off-diagonal - matrix not positive semidefinite, hence determinant is negative # for n in [tonic,mediant,dominant]: # for m in [tonic, mediant, dominant]: # if (n is tonic and m is mediant) or (n is mediant and m is tonic): # cov_mat[i,n,m] = 0.6 # else: # cov_mat[i,n,m] = 0.8 # filling non zero diagonals for j in range(num_chords // 2): if cov_mat[i, j, j] == 0: cov_mat[i, j, j] = 0.2 offset += 1 """observation matrix B is a multivariate Gaussian calculated from mean vector and covariance matrix""" for m in range(nFrames): for n in range(num_chords): B[n, m] = multivariate_gaussian( chroma[:, m], meu_mat[n, :], cov_mat[n, :, :] ) return (PI, A, B) """Viterbi algorithm to find Path with highest probability - dynamic programming""" def viterbi(PI, A, B): (nrow, ncol) = np.shape(B) path = np.zeros((nrow, ncol)) states = np.zeros((nrow, ncol)) path[:, 0] = PI * B[:, 0] for i in range(1, ncol): for j in range(nrow): s = [(path[k, i - 1] * A[k, j] * B[j, i], k) for k in range(nrow)] (prob, state) = max(s) path[j, i] = prob states[j, i - 1] = state return (path, states) ================================================ FILE: main.py ================================================ import numpy as np import os, sys, getopt import matplotlib.pyplot as plt from scipy.io.wavfile import read import json from chromagram import compute_chroma import hmm as hmm def get_templates(chords): """read from JSON file to get chord templates""" with open("data/chord_templates.json", "r") as fp: templates_json = json.load(fp) templates = [] for chord in chords: if chord == "N": continue templates.append(templates_json[chord]) return templates def get_nested_circle_of_fifths(): chords = [ "N", "G", "G#", "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "Gm", "G#m", "Am", "A#m", "Bm", "Cm", "C#m", "Dm", "D#m", "Em", "Fm", "F#m", ] nested_cof = [ "G", "Bm", "D", "F#m", "A", "C#m", "E", "G#m", "B", "D#m", "F#", "A#m", "C#", "Fm", "G#", "Cm", "D#", "Gm", "A#", "Dm", "F", "Am", "C", "Em", ] return chords, nested_cof def find_chords( x: np.ndarray, fs: int, templates: list, chords: list, nested_cof: list = None, method: str = None, plot: bool = False, ): """ Given a mono audio signal x, and its sampling frequency, fs, find chords in it using 'method' Args: x : mono audio signal fs : sampling frequency (Hz) templates: dictionary of chord templates chords: list of chords to search over nested_cof: nested circle of fifth chords method: template matching or HMM plot: if results should be plotted """ # framing audio, window length = 8192, hop size = 1024 and computing PCP nfft = 8192 hop_size = 1024 nFrames = int(np.round(len(x) / (nfft - hop_size))) # zero padding to make signal length long enough to have nFrames x = np.append(x, np.zeros(nfft)) xFrame = np.empty((nfft, nFrames)) start = 0 num_chords = len(templates) chroma = np.empty((num_chords // 2, nFrames)) id_chord = np.zeros(nFrames, dtype="int32") timestamp = np.zeros(nFrames) max_cor = np.zeros(nFrames) # step 1. compute chromagram for n in range(nFrames): xFrame[:, n] = x[start : start + nfft] start = start + nfft - hop_size timestamp[n] = n * (nfft - hop_size) / fs chroma[:, n] = compute_chroma(xFrame[:, n], fs) if method == "match_template": # correlate 12D chroma vector with each of # 24 major and minor chords for n in range(nFrames): cor_vec = np.zeros(num_chords) for ni in range(num_chords): cor_vec[ni] = np.correlate(chroma[:, n], np.array(templates[ni])) max_cor[n] = np.max(cor_vec) id_chord[n] = np.argmax(cor_vec) + 1 # if max_cor[n] < threshold, then no chord is played # might need to change threshold value id_chord[np.where(max_cor < 0.8 * np.max(max_cor))] = 0 final_chords = [chords[cid] for cid in id_chord] elif method == "hmm": # get max probability path from Viterbi algorithm (PI, A, B) = hmm.initialize(chroma, templates, chords, nested_cof) (path, states) = hmm.viterbi(PI, A, B) # normalize path for i in range(nFrames): path[:, i] /= sum(path[:, i]) # choose most likely chord - with max value in 'path' final_chords = [] indices = np.argmax(path, axis=0) final_states = np.zeros(nFrames) # find no chord zone set_zero = np.where(np.max(path, axis=0) < 0.3 * np.max(path))[0] if np.size(set_zero) > 0: indices[set_zero] = -1 # identify chords for i in range(nFrames): if indices[i] == -1: final_chords.append("NC") else: final_states[i] = states[indices[i], i] final_chords.append(chords[int(final_states[i])]) if plot: plt.figure() if method == "match_template": plt.yticks(np.arange(num_chords + 1), chords) plt.plot(timestamp, id_chord, marker="o") else: plt.yticks(np.arange(num_chords), chords) plt.plot(timestamp, np.int32(final_states), marker="o") plt.xlabel("Time in seconds") plt.ylabel("Chords") plt.title("Identified chords") plt.grid(True) plt.show() return timestamp, final_chords def main(argv): input_file = "" method = "" plot = False has_method = False try: opts, args = getopt.getopt(argv, "hi:m:p:", ["ifile=", "method=", "plot="]) except getopt.GetoptError: print("main.py -i -m ") sys.exit(2) for opt, arg in opts: if opt == "-h": print("main.py -i -m -p ") sys.exit() elif opt in ("-i", "--ifile"): input_file = arg elif opt in ("-m", "--method"): method = arg has_method = True elif opt in ("-p", "--plot"): plot = arg if not has_method: method = "match_template" print("Input file is ", input_file) print("Method is ", method) directory = os.getcwd() + "/data/test_chords/" # read the input file (fs, s) = read(directory + input_file) # convert to mono if file is stereo x = s[:, 0] if len(s.shape) else s # get chords and circle of fifths chords, nested_cof = get_nested_circle_of_fifths() # get chord templates templates = get_templates(chords) # find the chords if method == "match_template": timestamp, final_chords = find_chords( x, fs, templates=templates, chords=chords, method=method, plot=plot ) else: timestamp, final_chords = find_chords( x, fs, templates=templates, chords=chords[1:], nested_cof=nested_cof, method=method, plot=plot, ) # print chords with timestamps print("Time (s)", "Chord") for n in range(len(timestamp)): print("%.3f" % timestamp[n], final_chords[n]) if __name__ == "__main__": main(sys.argv[1:])