205 lines
4.4 KiB
205 lines
4.4 KiB
#lang scribble/manual
@(require (for-label racket))
Key tracker@section{categories}
Classes/BeatTrack, Classes/Loudness, Classes/MFCC, Classes/Onsets, Classes/Pitch
A (12TET major/minor) key tracker based on a pitch class profile of energy across FFT bins and matching this to templates for major and minor scales in all transpositions. It assumes a 440 Hz concert A reference. Output is 0-11 C major to B major, 12-23 C minor to B minor.
[fft] Audio input to track. This must have been pre-analysed by a 4096 size FFT. No other FFT sizes are valid except as noted below.
// With standard hop of half FFT size = 2048 samples
b = Buffer.alloc(s,4096,1); // for sampling rates 44100 and 48000
//b = Buffer.alloc(s,8192,1); // for sampling rates 88200 and 96000
[sk] Number of seconds for the influence of a window on the final key decision to decay by 40dB (to 0.01 its original value).
[sk] Each frame, the chroma values are set to the previous value multiplied by the chromadecay. 0.0 will start each frame afresh with no memory.
// The following files are test materials on my machine; you will subsitute your own filenames here
// A major
d = Buffer.read(s,"/Volumes/data/stevebeattrack/samples/100.wav");
// F major; hard to track!
d = Buffer.read(s,"/Volumes/data/stevebeattrack/samples/115.wav");
// straight forward since no transients; training set from MIREX2006
// 01 = A major
// 57 = b minor
// 78 e minor
// 08 Bb major
d = Buffer.read(s, "/Users/nickcollins/Desktop/ML/training_wav/78.wav")
b = Buffer.alloc(s, 4096, 1); // for sampling rates 44100 and 48000
var in, fft, resample;
var key, transientdetection;
in = PlayBuf.ar(1, d, BufRateScale.kr(d), 1, 0, 1);
fft = FFT(b, in);
key=KeyTrack.kr(fft, 2.0, 0.5);
// alternating major and minor chords as a test
var in, fft, resample;
var key, transientdetection;
in = Mix(SinOsc.ar((60 + [0, MouseX.kr(3, 4).round(1), 7]).midicps, 0, 0.1));
// major dom 7 and minor 7; major keys preferred here
//in = Mix(SinOsc.ar((60 + (MouseY.kr(0, 11).round(1.0)) + [0, MouseX.kr(3, 4).round(1), 7, 10]).midicps, 0, 0.1));
fft = FFT(b, in);
key = KeyTrack.kr(fft);
// Nice to hear what KeyTrack thinks:
d = Buffer.read(s, "/Users/nickcollins/Desktop/ML/training_wav/78.wav")
b = Buffer.alloc(s, 4096, 1); // for sampling rates 44100 and 48000
var in, fft, resample, chord, rootnote, sympath;
var key, transientdetection;
in = PlayBuf.ar(1, d, BufRateScale.kr(d), 1, 0, 1);
fft = FFT(b, in);
key = KeyTrack.kr(fft, 2.0, 0.5);
key = Median.kr(101, key); // Remove outlier wibbles
chord = if(key<12, #[0, 4, 7], #[0, 3, 7]);
rootnote = if(key<12, key, key-12) + 60;
sympath = SinOsc.ar((rootnote + chord).midicps, 0, 0.4).mean;
Out.ar(0,Pan2.ar(in, -0.5) + Pan2.ar(sympath, 0.5));
// Research Notes:
// See the MIREX2006 audio key tracking competition and Emilia Gomez's PhD thesis, Tonal Description of Music Audio Signals
// The following code was used to create the datasets for the UGen, and would be the basis of extensions
// Need one set of bin data for 44100 and one for 48000
// KeyTrack calculations, need to make arrays of FFT bins and weights for each chromatic tone.
// greater resolution, 4096 FFT, avoid lower octaves, too messy there
// 60*6*2 output arrays
var fftN, fftBins, binsize;
var midinotes;
var sr;
var wtlist, binlist;
sr = 48000; //44100;
fftN = 4096;
fftBins = fftN.div(2);
binsize = sr / fftN;
midinotes = (33..92); // 60 notes, 55 Hz up to 1661.2187903198 Hz
wtlist = List[];
binlist = List[];
// for each note have six harmonic locations
midinotes.do{ |note|
var freq, whichbin, lowerbin, prop;
freq = note.midicps;
var partialfreq, partialamp;
partialamp = 1.0 / (j + 1);
partialfreq = freq * (j + 1);
whichbin = partialfreq / binsize;
lowerbin = whichbin.asInteger;
prop = 1.0 - (whichbin - lowerbin);
binlist.add(lowerbin).add(lowerbin + 1);
wtlist.add(prop * partialamp).add((1.0 - prop) * partialamp);
Post << (binlist) << nl<< nl;
Post << (wtlist) << nl<< nl;