[
  {
    "path": "README.md",
    "content": "# Chord-Recognition\n<h2> Automatic chord recognition in Python </h2>\n\nChords are identified automatically from monophonic/polyphonic audio. The feature extracted is called the <i>Pitch Class Profile</i>, which is obtained \nby computing the <i>Constant Q Transform</i>. Two methods are used for classification:\n<ol>\n<li>\nTemplate matching - The pitch profile class is correlated with 24 major and minor chords, and the chord with highest correlation is identified.\nDetails given in the paper <i><a href = \"https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.93.4283&rep=rep1&type=pdf\">Automatic Chord Recognition from Audio Using Enhanced Pitch\n  Class Profile</a></i> - Kyogu Lee in Proc. of ICMC, 2006. \n</li>\n<li>\nHidden Markov Model - HMM is trained based on music theory according to the paper <i><A HREF = \"http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.375.2151&rep=rep1&type=pdf\">A Robust Mid-level Representation for Harmonic Content in Music \n  Signals</a></i> - Juan P. Bello, Proc. of ISMIR, 2005. Viterbi decoding is used to estimate chord sequence in multi-timral, polyphonic music.\n</ol>\n\n<h2> Usage </h2>\n<p> Run main.py with an input file name from data/test_chords/ with flag -m set to the method you want to use for detection, and -p for plotting the result. The default method is template matching. Example:\n\n```\npython3 main.py -i 'Grand Piano - Fazioli - major E middle.wav' -m hmm -p True\n```\n\nFor help, run `python3 main.py -h`\n</p>\n"
  },
  {
    "path": "chromagram.py",
    "content": "\"\"\"\nAlgorithm based on the paper 'Automatic Chord Recognition from\nAudio Using Enhanced Pitch Class Profile' by Kyogu Lee\nThis script computes 12 dimensional chromagram for chord detection\n@author ORCHISAMA\n\"\"\"\n\nfrom __future__ import division\nfrom scipy.signal import hamming\nfrom scipy.fftpack import fft\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n\ndef nearestPow2(inp):\n    power = np.ceil(np.log2(inp))\n    return 2**power\n\n\n\"\"\"Function to calculcate Harmonic Power Spectrum from DFT\"\"\"\n\n\ndef HPS(dft, M):\n\n    hps_len = int(np.ceil(np.size(dft) / (2**M)))\n    hps = np.ones(hps_len)\n    for n in range(hps_len):\n        for m in range(M + 1):\n            hps[n] *= np.absolute(dft[(2**m) * n])\n    return hps\n\n\n\"\"\"Function to compute CQT using sparse matrix multiplication, Brown and Puckette 1992- fast\"\"\"\n\n\ndef CQT_fast(x, fs, bins, fmin, fmax, M):\n\n    threshold = 0.0054  # for Hamming window\n    K = int(bins * np.ceil(np.log2(fmax / fmin)))\n    Q = 1 / (2 ** (1 / bins) - 1)\n    nfft = np.int32(nearestPow2(np.ceil(Q * fs / fmin)))\n    tempKernel = np.zeros(nfft, dtype=np.complex)\n    specKernel = np.zeros(nfft, dtype=np.complex)\n    sparKernel = []\n\n    # create sparse Kernel\n    for k in range(K - 1, -1, -1):\n        fk = (2 ** (k / bins)) * fmin\n        N = np.int32(np.round((Q * fs) / fk))\n        tempKernel[:N] = hamming(N) / N * np.exp(-2 * np.pi * 1j * Q * np.arange(N) / N)\n        specKernel = fft(tempKernel)\n        specKernel[np.where(np.abs(specKernel) <= threshold)] = 0\n        if k == K - 1:\n            sparKernel = specKernel\n        else:\n            sparKernel = np.vstack((specKernel, sparKernel))\n\n    sparKernel = np.transpose(np.conjugate(sparKernel)) / nfft\n    ft = fft(x, nfft)\n    cqt = np.dot(ft, sparKernel)\n    ft = fft(x, nfft * (2**M))\n    # calculate harmonic power spectrum\n    # harm_pow = HPS(ft,M)\n    # cqt = np.dot(harm_pow, sparKernel)\n    return cqt\n\n\n\"\"\"Function to compute constant Q Transform, Judith Brown, 1991 - slow\"\"\"\n\n\ndef CQT_slow(x, fs, bins, fmin, fmax):\n\n    K = int(bins * np.ceil(np.log2(fmax / fmin)))\n    Q = 1 / (2 ** (1 / bins) - 1)\n    cqt = np.zeros(K, dtype=np.complex)\n\n    for k in range(K):\n        fk = (2 ** (k / bins)) * fmin\n        N = int(np.round(Q * fs / fk))\n        arr = -2 * np.pi * 1j * Q * np.arange(N) / N\n        cqt[k] = np.dot(x[:N], np.transpose(hamming(N) * np.exp(arr))) / N\n    return cqt\n\n\n\"\"\"Function to compute Pitch Class Profile from constant Q transform\"\"\"\n\n\ndef PCP(cqt, bins, M):\n    CH = np.zeros(bins)\n    for b in range(bins):\n        CH[b] = np.sum(cqt[b + (np.arange(M) * bins)])\n    return CH\n\n\ndef compute_chroma(x, fs):\n\n    fmin = 96\n    fmax = 5250\n    bins = 12\n    M = 3\n    nOctave = np.int32(np.ceil(np.log2(fmax / fmin)))\n    CH = np.zeros(bins)\n    # Compute constant Q transform\n    cqt_fast = CQT_fast(x, fs, bins, fmin, fmax, M)\n    # get Pitch Class Profile\n    CH = PCP(np.absolute(cqt_fast), bins, nOctave)\n    return CH\n"
  },
  {
    "path": "create_templates.py",
    "content": "\"\"\"\nAlgorithm based on the paper 'Automatic Chord Recognition from\nAudio Using Enhanced Pitch Class Profile' by Kyogu Lee\nThis script computes 12 dimensional chromagram for chord detection\n@author ORCHISAMA DAS\n\"\"\"\n\n\"\"\"Create pitch profile template for 12 major and 12 minor chords and save them in a json file\nGmajor template = [1,0,0,0,1,0,0,1,0,0,0,0] - needs to be run just once\"\"\"\n\nimport json\n\ntemplate = dict()\nmajor = [\"G\", \"G#\", \"A\", \"A#\", \"B\", \"C\", \"C#\", \"D\", \"D#\", \"E\", \"F\", \"F#\"]\nminor = [\"Gm\", \"G#m\", \"Am\", \"A#m\", \"Bm\", \"Cm\", \"C#m\", \"Dm\", \"D#m\", \"Em\", \"Fm\", \"F#m\"]\noffset = 0\nnum_chords = len(major)\n\n# initialise lists with zeros\nfor chord in range(num_chords):\n    template[major[chord]] = list()\n    template[minor[chord]] = list()\n    for note in range(num_chords):\n        template[major[chord]].append(0)\n        template[minor[chord]].append(0)\n\nfor chord in range(num_chords):\n    for note in range(num_chords):\n        if note == 0 or note == 7:\n            template[major[chord]][(note + offset) % num_chords] = 1\n            template[minor[chord]][(note + offset) % num_chords] = 1\n        elif note == 4:\n            template[major[chord]][(note + offset) % num_chords] = 1\n        elif note == 3:\n            template[minor[chord]][(note + offset) % num_chords] = 1\n    offset += 1\n\n# debugging\nfor key, value in template.items():\n    print(key, value)\n\n# save as JSON file\nwith open(\"chord_templates.json\", \"w\") as fp:\n    json.dump(template, fp, sort_keys=False)\n    print(\"Saved succesfully to JSON file\")\n"
  },
  {
    "path": "data/chord_templates.json",
    "content": "{\"A#m\": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0], \"C#m\": [0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0], \"A#\": [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0], \"Dm\": [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0], \"C#\": [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0], \"Bm\": [0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1], \"G#\": [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], \"Fm\": [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0], \"A\": [0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0], \"C\": [1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0], \"B\": [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1], \"E\": [0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0], \"D\": [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1], \"G\": [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0], \"F\": [0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0], \"G#m\": [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0], \"Em\": [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0], \"D#m\": [0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1], \"Cm\": [1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0], \"Am\": [0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0], \"D#\": [1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0], \"F#\": [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1], \"Gm\": [1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0], \"F#m\": [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1]}"
  },
  {
    "path": "data/test_chords/readme.txt",
    "content": "piano chords downloaded from http://ibeat.org/piano-chords-free/. Give them credit under the Creative Commons License."
  },
  {
    "path": "hmm.py",
    "content": "\"\"\"Automatic chord recogniton with HMM, as suggested by Juan P. Bello in\n'A mid level representation for harmonic content in music signals'\n@author ORCHISAMA DAS, 2016\"\"\"\n\nfrom __future__ import division\nfrom chromagram import compute_chroma\nimport os\nimport numpy as np\n\n\n\"\"\"calculates multivariate gaussian matrix from mean and covariance matrices\"\"\"\n\n\ndef multivariate_gaussian(x, meu, cov):\n\n    det = np.linalg.det(cov)\n    val = np.exp(-0.5 * np.dot(np.dot((x - meu).T, np.linalg.inv(cov)), (x - meu)))\n    try:\n        val /= np.sqrt(((2 * np.pi) ** 12) * det)\n    except:\n        print(\"Matrix is not positive, semi-definite\")\n    if np.isnan(val):\n        val = np.finfo(float).eps\n    return val\n\n\n\"\"\"initialize the emission, transition and initialisation matrices for HMM in chord recognition\nPI - initialisation matrix, #A - transition matrix, #B - observation matrix\"\"\"\n\n\ndef initialize(chroma, templates, chords, nested_cof):\n\n    \"\"\"initialising PI with equal probabilities\"\"\"\n    num_chords = len(chords)\n    PI = np.ones(num_chords) / num_chords\n\n    \"\"\"initialising A based on nested circle of fifths\"\"\"\n    eps = 0.01\n    A = np.empty((num_chords, num_chords))\n    for chord in chords:\n        ind = nested_cof.index(chord)\n        t = ind\n        for i in range(num_chords):\n            if t >= num_chords:\n                t = t % num_chords\n            A[ind][t] = (abs(num_chords // 2 - i) + eps) / (\n                num_chords**2 + num_chords * eps\n            )\n            t += 1\n\n    \"\"\"initialising based on tonic triads - Mean matrix; Tonic with dominant - 0.8,\n    tonic with mediant 0.6 and mediant-dominant 0.8, non-triad diagonal elements \n    with 0.2 - covariance matrix\"\"\"\n\n    nFrames = np.shape(chroma)[1]\n    B = np.zeros((num_chords, nFrames))\n    meu_mat = np.zeros((num_chords, num_chords // 2))\n    cov_mat = np.zeros((num_chords, num_chords // 2, num_chords // 2))\n    meu_mat = np.array(templates)\n    offset = 0\n\n    for i in range(num_chords):\n        if i == num_chords // 2:\n            offset = 0\n        tonic = offset\n        if i < num_chords // 2:\n            mediant = (tonic + 4) % (num_chords // 2)\n        else:\n            mediant = (tonic + 3) % (num_chords // 2)\n        dominant = (tonic + 7) % (num_chords // 2)\n\n        # weighted diagonal\n        cov_mat[i, tonic, tonic] = 0.8\n        cov_mat[i, mediant, mediant] = 0.6\n        cov_mat[i, dominant, dominant] = 0.8\n\n        # off-diagonal - matrix not positive semidefinite, hence determinant is negative\n        # for n in [tonic,mediant,dominant]:\n        #   for m in [tonic, mediant, dominant]:\n        #       if (n is tonic and m is mediant) or (n is mediant and m is tonic):\n        #           cov_mat[i,n,m] = 0.6\n        #       else:\n        #           cov_mat[i,n,m] = 0.8\n\n        # filling non zero diagonals\n        for j in range(num_chords // 2):\n            if cov_mat[i, j, j] == 0:\n                cov_mat[i, j, j] = 0.2\n        offset += 1\n\n    \"\"\"observation matrix B is a multivariate Gaussian calculated from mean vector and \n    covariance matrix\"\"\"\n\n    for m in range(nFrames):\n        for n in range(num_chords):\n            B[n, m] = multivariate_gaussian(\n                chroma[:, m], meu_mat[n, :], cov_mat[n, :, :]\n            )\n\n    return (PI, A, B)\n\n\n\"\"\"Viterbi algorithm to find Path with highest probability - dynamic programming\"\"\"\n\n\ndef viterbi(PI, A, B):\n    (nrow, ncol) = np.shape(B)\n    path = np.zeros((nrow, ncol))\n    states = np.zeros((nrow, ncol))\n    path[:, 0] = PI * B[:, 0]\n\n    for i in range(1, ncol):\n        for j in range(nrow):\n            s = [(path[k, i - 1] * A[k, j] * B[j, i], k) for k in range(nrow)]\n            (prob, state) = max(s)\n            path[j, i] = prob\n            states[j, i - 1] = state\n\n    return (path, states)\n"
  },
  {
    "path": "main.py",
    "content": "import numpy as np\nimport os, sys, getopt\nimport matplotlib.pyplot as plt\nfrom scipy.io.wavfile import read\nimport json\nfrom chromagram import compute_chroma\nimport hmm as hmm\n\n\ndef get_templates(chords):\n    \"\"\"read from JSON file to get chord templates\"\"\"\n    with open(\"data/chord_templates.json\", \"r\") as fp:\n        templates_json = json.load(fp)\n    templates = []\n\n    for chord in chords:\n        if chord == \"N\":\n            continue\n        templates.append(templates_json[chord])\n\n    return templates\n\n\ndef get_nested_circle_of_fifths():\n    chords = [\n        \"N\",\n        \"G\",\n        \"G#\",\n        \"A\",\n        \"A#\",\n        \"B\",\n        \"C\",\n        \"C#\",\n        \"D\",\n        \"D#\",\n        \"E\",\n        \"F\",\n        \"F#\",\n        \"Gm\",\n        \"G#m\",\n        \"Am\",\n        \"A#m\",\n        \"Bm\",\n        \"Cm\",\n        \"C#m\",\n        \"Dm\",\n        \"D#m\",\n        \"Em\",\n        \"Fm\",\n        \"F#m\",\n    ]\n    nested_cof = [\n        \"G\",\n        \"Bm\",\n        \"D\",\n        \"F#m\",\n        \"A\",\n        \"C#m\",\n        \"E\",\n        \"G#m\",\n        \"B\",\n        \"D#m\",\n        \"F#\",\n        \"A#m\",\n        \"C#\",\n        \"Fm\",\n        \"G#\",\n        \"Cm\",\n        \"D#\",\n        \"Gm\",\n        \"A#\",\n        \"Dm\",\n        \"F\",\n        \"Am\",\n        \"C\",\n        \"Em\",\n    ]\n    return chords, nested_cof\n\n\ndef find_chords(\n    x: np.ndarray,\n    fs: int,\n    templates: list,\n    chords: list,\n    nested_cof: list = None,\n    method: str = None,\n    plot: bool = False,\n):\n    \"\"\"\n    Given a mono audio signal x, and its sampling frequency, fs,\n    find chords in it using 'method'\n    Args:\n        x : mono audio signal\n        fs : sampling frequency (Hz)\n        templates: dictionary of chord templates\n        chords: list of chords to search over\n        nested_cof: nested circle of fifth chords\n        method: template matching or HMM\n        plot: if results should be plotted\n    \"\"\"\n\n    # framing audio, window length = 8192, hop size = 1024 and computing PCP\n    nfft = 8192\n    hop_size = 1024\n    nFrames = int(np.round(len(x) / (nfft - hop_size)))\n    # zero padding to make signal length long enough to have nFrames\n    x = np.append(x, np.zeros(nfft))\n    xFrame = np.empty((nfft, nFrames))\n    start = 0\n    num_chords = len(templates)\n    chroma = np.empty((num_chords // 2, nFrames))\n    id_chord = np.zeros(nFrames, dtype=\"int32\")\n    timestamp = np.zeros(nFrames)\n    max_cor = np.zeros(nFrames)\n\n    # step 1. compute chromagram\n    for n in range(nFrames):\n        xFrame[:, n] = x[start : start + nfft]\n        start = start + nfft - hop_size\n        timestamp[n] = n * (nfft - hop_size) / fs\n        chroma[:, n] = compute_chroma(xFrame[:, n], fs)\n\n    if method == \"match_template\":\n        # correlate 12D chroma vector with each of\n        # 24 major and minor chords\n        for n in range(nFrames):\n            cor_vec = np.zeros(num_chords)\n            for ni in range(num_chords):\n                cor_vec[ni] = np.correlate(chroma[:, n], np.array(templates[ni]))\n            max_cor[n] = np.max(cor_vec)\n            id_chord[n] = np.argmax(cor_vec) + 1\n\n        # if max_cor[n] < threshold, then no chord is played\n        # might need to change threshold value\n        id_chord[np.where(max_cor < 0.8 * np.max(max_cor))] = 0\n        final_chords = [chords[cid] for cid in id_chord]\n\n    elif method == \"hmm\":\n        # get max probability path from Viterbi algorithm\n        (PI, A, B) = hmm.initialize(chroma, templates, chords, nested_cof)\n        (path, states) = hmm.viterbi(PI, A, B)\n\n        # normalize path\n        for i in range(nFrames):\n            path[:, i] /= sum(path[:, i])\n\n        # choose most likely chord - with max value in 'path'\n        final_chords = []\n        indices = np.argmax(path, axis=0)\n        final_states = np.zeros(nFrames)\n\n        # find no chord zone\n        set_zero = np.where(np.max(path, axis=0) < 0.3 * np.max(path))[0]\n        if np.size(set_zero) > 0:\n            indices[set_zero] = -1\n\n        # identify chords\n        for i in range(nFrames):\n            if indices[i] == -1:\n                final_chords.append(\"NC\")\n            else:\n                final_states[i] = states[indices[i], i]\n                final_chords.append(chords[int(final_states[i])])\n\n    if plot:\n        plt.figure()\n        if method == \"match_template\":\n            plt.yticks(np.arange(num_chords + 1), chords)\n            plt.plot(timestamp, id_chord, marker=\"o\")\n\n        else:\n            plt.yticks(np.arange(num_chords), chords)\n            plt.plot(timestamp, np.int32(final_states), marker=\"o\")\n\n        plt.xlabel(\"Time in seconds\")\n        plt.ylabel(\"Chords\")\n        plt.title(\"Identified chords\")\n        plt.grid(True)\n        plt.show()\n\n    return timestamp, final_chords\n\n\ndef main(argv):\n    input_file = \"\"\n    method = \"\"\n    plot = False\n    has_method = False\n    try:\n        opts, args = getopt.getopt(argv, \"hi:m:p:\", [\"ifile=\", \"method=\", \"plot=\"])\n    except getopt.GetoptError:\n        print(\"main.py -i <inputfile> -m <method>\")\n        sys.exit(2)\n    for opt, arg in opts:\n        if opt == \"-h\":\n            print(\"main.py -i <input_file> -m <method> -p <plot>\")\n            sys.exit()\n        elif opt in (\"-i\", \"--ifile\"):\n            input_file = arg\n        elif opt in (\"-m\", \"--method\"):\n            method = arg\n            has_method = True\n        elif opt in (\"-p\", \"--plot\"):\n            plot = arg\n    if not has_method:\n        method = \"match_template\"\n\n    print(\"Input file is \", input_file)\n    print(\"Method is \", method)\n    directory = os.getcwd() + \"/data/test_chords/\"\n    # read the input file\n    (fs, s) = read(directory + input_file)\n    # convert to mono if file is stereo\n    x = s[:, 0] if len(s.shape) else s\n\n    # get chords and circle of fifths\n    chords, nested_cof = get_nested_circle_of_fifths()\n    # get chord templates\n    templates = get_templates(chords)\n\n    # find the chords\n    if method == \"match_template\":\n        timestamp, final_chords = find_chords(\n            x, fs, templates=templates, chords=chords, method=method, plot=plot\n        )\n    else:\n        timestamp, final_chords = find_chords(\n            x,\n            fs,\n            templates=templates,\n            chords=chords[1:],\n            nested_cof=nested_cof,\n            method=method,\n            plot=plot,\n        )\n\n    # print chords with timestamps\n    print(\"Time (s)\", \"Chord\")\n    for n in range(len(timestamp)):\n        print(\"%.3f\" % timestamp[n], final_chords[n])\n\n\nif __name__ == \"__main__\":\n    main(sys.argv[1:])\n"
  }
]