204 lines
4.4 KiB
Racket
204 lines
4.4 KiB
Racket
#lang scribble/manual
|
|
@(require (for-label racket))
|
|
|
|
@title{KeyTrack}
|
|
Key tracker@section{categories}
|
|
UGens>Analysis>Pitch
|
|
@section{related}
|
|
Classes/BeatTrack, Classes/Loudness, Classes/MFCC, Classes/Onsets, Classes/Pitch
|
|
|
|
@section{description}
|
|
|
|
A (12TET major/minor) key tracker based on a pitch class profile of energy across FFT bins and matching this to templates for major and minor scales in all transpositions. It assumes a 440 Hz concert A reference. Output is 0-11 C major to B major, 12-23 C minor to B minor.
|
|
|
|
@section{classmethods}
|
|
|
|
|
|
@section{method}
|
|
kr
|
|
|
|
@section{argument}
|
|
chain
|
|
[fft] Audio input to track. This must have been pre-analysed by a 4096 size FFT. No other FFT sizes are valid except as noted below.
|
|
|
|
@racketblock[
|
|
// With standard hop of half FFT size = 2048 samples
|
|
b = Buffer.alloc(s,4096,1); // for sampling rates 44100 and 48000
|
|
//b = Buffer.alloc(s,8192,1); // for sampling rates 88200 and 96000
|
|
::
|
|
|
|
]
|
|
@section{argument}
|
|
keydecay
|
|
[sk] Number of seconds for the influence of a window on the final key decision to decay by 40dB (to 0.01 its original value).
|
|
|
|
@section{argument}
|
|
chromaleak
|
|
[sk] Each frame, the chroma values are set to the previous value multiplied by the chromadecay. 0.0 will start each frame afresh with no memory.
|
|
|
|
@section{examples}
|
|
|
|
|
|
|
|
@racketblock[
|
|
// The following files are test materials on my machine; you will subsitute your own filenames here
|
|
// A major
|
|
d = Buffer.read(s,"/Volumes/data/stevebeattrack/samples/100.wav");
|
|
// F major; hard to track!
|
|
d = Buffer.read(s,"/Volumes/data/stevebeattrack/samples/115.wav");
|
|
|
|
// straight forward since no transients; training set from MIREX2006
|
|
// 01 = A major
|
|
// 57 = b minor
|
|
// 78 e minor
|
|
// 08 Bb major
|
|
d = Buffer.read(s, "/Users/nickcollins/Desktop/ML/training_wav/78.wav")
|
|
|
|
b = Buffer.alloc(s, 4096, 1); // for sampling rates 44100 and 48000
|
|
|
|
(
|
|
{
|
|
var in, fft, resample;
|
|
var key, transientdetection;
|
|
|
|
in = PlayBuf.ar(1, d, BufRateScale.kr(d), 1, 0, 1);
|
|
|
|
fft = FFT(b, in);
|
|
|
|
key=KeyTrack.kr(fft, 2.0, 0.5);
|
|
|
|
key.poll;
|
|
|
|
Out.ar(0,Pan2.ar(in));
|
|
}.play
|
|
)
|
|
::
|
|
|
|
|
|
|
|
]
|
|
|
|
@racketblock[
|
|
// alternating major and minor chords as a test
|
|
(
|
|
{
|
|
var in, fft, resample;
|
|
var key, transientdetection;
|
|
|
|
in = Mix(SinOsc.ar((60 + [0, MouseX.kr(3, 4).round(1), 7]).midicps, 0, 0.1));
|
|
|
|
// major dom 7 and minor 7; major keys preferred here
|
|
//in = Mix(SinOsc.ar((60 + (MouseY.kr(0, 11).round(1.0)) + [0, MouseX.kr(3, 4).round(1), 7, 10]).midicps, 0, 0.1));
|
|
|
|
fft = FFT(b, in);
|
|
|
|
key = KeyTrack.kr(fft);
|
|
|
|
key.poll;
|
|
|
|
Out.ar(0,Pan2.ar(in));
|
|
}.play
|
|
)
|
|
::
|
|
|
|
|
|
|
|
]
|
|
|
|
@racketblock[
|
|
// Nice to hear what KeyTrack thinks:
|
|
|
|
d = Buffer.read(s, "/Users/nickcollins/Desktop/ML/training_wav/78.wav")
|
|
b = Buffer.alloc(s, 4096, 1); // for sampling rates 44100 and 48000
|
|
|
|
(
|
|
{
|
|
var in, fft, resample, chord, rootnote, sympath;
|
|
var key, transientdetection;
|
|
|
|
in = PlayBuf.ar(1, d, BufRateScale.kr(d), 1, 0, 1);
|
|
|
|
fft = FFT(b, in);
|
|
|
|
key = KeyTrack.kr(fft, 2.0, 0.5);
|
|
key.poll;
|
|
key = Median.kr(101, key); // Remove outlier wibbles
|
|
|
|
chord = if(key<12, #[0, 4, 7], #[0, 3, 7]);
|
|
rootnote = if(key<12, key, key-12) + 60;
|
|
|
|
sympath = SinOsc.ar((rootnote + chord).midicps, 0, 0.4).mean;
|
|
|
|
Out.ar(0,Pan2.ar(in, -0.5) + Pan2.ar(sympath, 0.5));
|
|
}.play
|
|
)
|
|
::
|
|
|
|
|
|
|
|
]
|
|
|
|
@racketblock[
|
|
// Research Notes:
|
|
// See the MIREX2006 audio key tracking competition and Emilia Gomez's PhD thesis, Tonal Description of Music Audio Signals
|
|
|
|
// The following code was used to create the datasets for the UGen, and would be the basis of extensions
|
|
|
|
// Need one set of bin data for 44100 and one for 48000
|
|
|
|
// KeyTrack calculations, need to make arrays of FFT bins and weights for each chromatic tone.
|
|
// greater resolution, 4096 FFT, avoid lower octaves, too messy there
|
|
// 60*6*2 output arrays
|
|
|
|
(
|
|
var fftN, fftBins, binsize;
|
|
var midinotes;
|
|
var sr;
|
|
var wtlist, binlist;
|
|
|
|
sr = 48000; //44100;
|
|
|
|
fftN = 4096;
|
|
fftBins = fftN.div(2);
|
|
binsize = sr / fftN;
|
|
|
|
midinotes = (33..92); // 60 notes, 55 Hz up to 1661.2187903198 Hz
|
|
|
|
wtlist = List[];
|
|
binlist = List[];
|
|
|
|
// for each note have six harmonic locations
|
|
midinotes.do{ |note|
|
|
var freq, whichbin, lowerbin, prop;
|
|
|
|
freq = note.midicps;
|
|
|
|
6.do{|j|
|
|
var partialfreq, partialamp;
|
|
|
|
partialamp = 1.0 / (j + 1);
|
|
partialfreq = freq * (j + 1);
|
|
|
|
whichbin = partialfreq / binsize;
|
|
lowerbin = whichbin.asInteger;
|
|
prop = 1.0 - (whichbin - lowerbin);
|
|
|
|
binlist.add(lowerbin).add(lowerbin + 1);
|
|
wtlist.add(prop * partialamp).add((1.0 - prop) * partialamp);
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
Post << (binlist) << nl<< nl;
|
|
|
|
Post << (wtlist) << nl<< nl;
|
|
|
|
binlist.size.postln;
|
|
wtlist.size.postln;
|
|
)
|
|
::
|
|
]
|
|
|
|
|