path: root/Game/Code/Classes/URecord.pas



unit URecord;

interface

{$IFDEF FPC}
  {$MODE Delphi}
{$ENDIF}

{$I switches.inc}

uses Classes,
     Math,
     SysUtils,
     UCommon,
     UMusic,
     UIni;

const
  BaseToneFreq = 65.4064; // lowest (half-)tone to analyze (C2 = 65.4064 Hz)
  NumHalftones = 36; // C2-B4 (for Whitney and my high voice)

type
  TCaptureBuffer = class
    private
      BufferNew:    TMemoryStream; // buffer for newest samples

      function GetToneString: string; // converts a tone to its string represenatation;
    public
      BufferArray:  array[0..4095] of smallint; // newest 4096 samples
      BufferLong:   TMemoryStream;              // full buffer
      AnalysisBufferSize: integer; // number of samples of BufferArray to analyze

      AudioFormat: TAudioFormatInfo;

      // pitch detection
      ToneValid:    boolean;    // true if Tone contains a valid value (otherwise it contains noise)
      Tone:         integer;    // tone relative to one octave (e.g. C2=C3=C4). Range: 0-11
      ToneAbs:      integer;    // absolute (full range) tone (e.g. C2<>C3). Range: 0..NumHalftones-1

      // methods
      constructor Create;
      destructor Destroy; override;

      procedure Clear;

      procedure ProcessNewBuffer;
      // use to analyze sound from buffers to get new pitch
      procedure AnalyzeBuffer;
      // we call it to analyze sound by checking Autocorrelation
      procedure AnalyzeByAutocorrelation;
      // use this to check one frequency by Autocorrelation
      function AnalyzeAutocorrelationFreq(Freq: real): real;
      function MaxSampleVolume: Single;

      property ToneString: string READ GetToneString;
  end;

const
  DEFAULT_SOURCE_NAME = '[Default]';

type
  TAudioInputSource = record
    Name:   string;
  end;

  // soundcard input-devices information
  TAudioInputDevice = class
    public
      CfgIndex:        integer;   // index of this device in Ini.InputDeviceConfig
      Name:            string;    // soundcard name
      Source:          array of TAudioInputSource; // soundcard input-sources
      SourceRestore:   integer;  // source-index that will be selected after capturing (-1: not detected)
      MicSource:       integer;  // source-index of mic (-1: none detected)

      AudioFormat:     TAudioFormatInfo; // capture format info (e.g. 44.1kHz SInt16 stereo)
      CaptureChannel:  array of TCaptureBuffer; // sound-buffer references used for mono or stereo channel's capture data

      destructor Destroy; override;

      procedure LinkCaptureBuffer(ChannelIndex: integer; Sound: TCaptureBuffer);

      // TODO: add Open/Close functions so Start/Stop becomes faster
      //function Open(): boolean;  virtual; abstract;
      //function Close(): boolean; virtual; abstract;
      function Start(): boolean; virtual; abstract;
      function Stop(): boolean;  virtual; abstract;

      function GetVolume(): single;        virtual; abstract;
      procedure SetVolume(Volume: single); virtual; abstract;
  end;

  TAudioInputProcessor = class
    public
      Sound:  array of TCaptureBuffer; // sound-buffers for every player
      DeviceList: array of TAudioInputDevice;

      constructor Create;

      procedure UpdateInputDeviceConfig;

      // handle microphone input
      procedure HandleMicrophoneData(Buffer: Pointer; Size: Cardinal;
                                     InputDevice: TAudioInputDevice);
  end;

  TAudioInputBase = class( TInterfacedObject, IAudioInput )
    private
      Started: boolean;
    protected
      function UnifyDeviceName(const name: string; deviceIndex: integer): string;
    public
      function GetName: String;           virtual; abstract;
      function InitializeRecord: boolean; virtual; abstract;
      function FinalizeRecord: boolean;   virtual;

      procedure CaptureStart;
      procedure CaptureStop;
  end;


  SmallIntArray = array [0..maxInt shr 1-1] of smallInt;
  PSmallIntArray = ^SmallIntArray;

  function AudioInputProcessor(): TAudioInputProcessor;

implementation

uses
  ULog,
  UMain;

var
  singleton_AudioInputProcessor : TAudioInputProcessor = nil;


// FIXME: Race-Conditions between Callback-thread and main-thread
//        on BufferArray (maybe BufferNew also).
//        Use SDL-mutexes to solve this problem.


{ Global }

function AudioInputProcessor(): TAudioInputProcessor;
begin
  if singleton_AudioInputProcessor = nil then
    singleton_AudioInputProcessor := TAudioInputProcessor.create();

  result := singleton_AudioInputProcessor;
end;


{ TAudioInputDevice }

destructor TAudioInputDevice.Destroy;
begin
  Stop();
  Source := nil;
  CaptureChannel := nil;
  FreeAndNil(AudioFormat);
  inherited Destroy;
end;

procedure TAudioInputDevice.LinkCaptureBuffer(ChannelIndex: integer; Sound: TCaptureBuffer);
begin
  // check bounds
  if ((ChannelIndex < 0) or (ChannelIndex > High(CaptureChannel))) then
    Exit;

  // reset audio-format of old capture-buffer
  if (CaptureChannel[ChannelIndex] <> nil) then
    CaptureChannel[ChannelIndex].AudioFormat := nil;

  // set audio-format of new capture-buffer
  if (Sound <> nil) then
    Sound.AudioFormat := AudioFormat;

  // replace old with new buffer
  CaptureChannel[ChannelIndex] := Sound;
end;

{ TSound }

constructor TCaptureBuffer.Create;
begin
  inherited;
  BufferNew := TMemoryStream.Create;
  BufferLong := TMemoryStream.Create;
  AnalysisBufferSize := Min(4*1024, Length(BufferArray));
end;

destructor TCaptureBuffer.Destroy;
begin
  AudioFormat := nil;
  FreeAndNil(BufferNew);
  FreeAndNil(BufferLong);
  inherited;
end;

procedure TCaptureBuffer.Clear;
begin
  if assigned(BufferNew) then
    BufferNew.Clear;
  if assigned(BufferLong) then
    BufferLong.Clear;
  FillChar(BufferArray[0], Length(BufferArray) * SizeOf(SmallInt), 0);
end;

procedure TCaptureBuffer.ProcessNewBuffer;
var
  SkipCount:   integer;
  NumSamples:  integer;
  SampleIndex: integer;
begin
  // process BufferArray
  SkipCount := 0;
  NumSamples := BufferNew.Size div 2;

  // check if we have more new samples than we can store
  if (NumSamples > Length(BufferArray)) then
  begin
    // discard the oldest of the new samples
    SkipCount := NumSamples - Length(BufferArray);
    NumSamples := Length(BufferArray);
  end;

  // move old samples to the beginning of the array (if necessary)
  // TODO: should be a ring-buffer instead
  for SampleIndex := NumSamples to High(BufferArray) do
    BufferArray[SampleIndex-NumSamples] := BufferArray[SampleIndex];

  // skip samples if necessary
  BufferNew.Seek(2*SkipCount, soBeginning);
  // copy samples
  BufferNew.ReadBuffer(BufferArray[Length(BufferArray)-NumSamples], 2*NumSamples);

  // save capture-data to BufferLong if neccessary
  if (Ini.SavePlayback = 1) then
  begin
    // this is just for debugging (approx 15MB per player for a 3min song!!!)
    // For an in-game replay-mode we need to compress data so we do not
    // waste that much memory. Maybe ogg-vorbis with voice-preset in fast-mode?
    // Or we could use a faster but not that efficient lossless compression.
    BufferNew.Seek(0, soBeginning);
    BufferLong.CopyFrom(BufferNew, BufferNew.Size);
  end;
end;

procedure TCaptureBuffer.AnalyzeBuffer;
var
  Volume:    single;
  MaxVolume: single;
  SampleIndex: integer;
  Threshold: single;
begin
  ToneValid := false;
  ToneAbs := -1;
  Tone    := -1;

  // find maximum volume of first 1024 samples
  MaxVolume := 0;
  for SampleIndex := 0 to 1023 do
  begin
    Volume := Abs(BufferArray[SampleIndex]) / -Low(Smallint);
    if Volume > MaxVolume then
       MaxVolume := Volume;
  end;

  Threshold := IThresholdVals[Ini.ThresholdIndex];
  
  // check if signal has an acceptable volume (ignore background-noise)
  if MaxVolume >= Threshold then
  begin
    // analyse the current voice pitch
    AnalyzeByAutocorrelation;
    ToneValid := true;
  end;
end;

procedure TCaptureBuffer.AnalyzeByAutocorrelation;
var
  ToneIndex: integer;
  CurFreq:   real;
  CurWeight: real;
  MaxWeight: real;
  MaxTone:   integer;
const
  HalftoneBase = 1.05946309436; // 2^(1/12) -> HalftoneBase^12 = 2 (one octave)
begin
  // prepare to analyze
  MaxWeight := -1;
  MaxTone := 0; // this is not needed, but it satifies the compiler

  // analyze halftones
  // Note: at the lowest tone (~65Hz) and a buffer-size of 4096
  // at 44.1 (or 48kHz) only 6 (or 5) samples are compared, this might be
  // too few samples -> use a bigger buffer-size
  for ToneIndex := 0 to NumHalftones-1 do
  begin
    CurFreq := BaseToneFreq * Power(HalftoneBase, ToneIndex);
    CurWeight := AnalyzeAutocorrelationFreq(CurFreq);

    // TODO: prefer higher frequencies (use >= or use downto)
    if (CurWeight > MaxWeight) then
    begin
      // this frequency has a higher weight
      MaxWeight := CurWeight;
      MaxTone   := ToneIndex;
    end;
  end;

  ToneAbs := MaxTone;
  Tone    := MaxTone mod 12;
end;

// result medium difference
function TCaptureBuffer.AnalyzeAutocorrelationFreq(Freq: real): real;
var
  Dist:                   real;    // distance (0=equal .. 1=totally different) between correlated samples
  AccumDist:              real;    // accumulated distances
  SampleIndex:            integer; // index of sample to analyze
  CorrelatingSampleIndex: integer; // index of sample one period ahead
  SamplesPerPeriod:       integer; // samples in one period
begin
  SampleIndex := 0;
  SamplesPerPeriod := Round(AudioFormat.SampleRate/Freq);
  CorrelatingSampleIndex := SampleIndex + SamplesPerPeriod;

  AccumDist := 0;

  // compare correlating samples
  while (CorrelatingSampleIndex < AnalysisBufferSize) do
  begin
    // calc distance (correlation: 1-dist) to corresponding sample in next period
    Dist := Abs(BufferArray[SampleIndex] - BufferArray[CorrelatingSampleIndex]) /
            High(Word);
    AccumDist := AccumDist + Dist;
    Inc(SampleIndex);
    Inc(CorrelatingSampleIndex);
  end;

  // return "inverse" average distance (=correlation)
  Result := 1 - AccumDist / AnalysisBufferSize;
end;

function TCaptureBuffer.MaxSampleVolume: Single;
var
  lSampleIndex: Integer;
  lMaxVol : Longint;
begin;
  // FIXME: lock buffer to avoid race-conditions
  lMaxVol := 0;
  for lSampleIndex := 0 to High(BufferArray) do
  begin
    if Abs(BufferArray[lSampleIndex]) > lMaxVol then
      lMaxVol := Abs(BufferArray[lSampleIndex]);
  end;

  result := lMaxVol / -Low(Smallint);
end;

const
  ToneStrings: array[0..11] of string = (
    'C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'
  );

function TCaptureBuffer.GetToneString: string;
begin
  if (ToneValid) then
    Result := ToneStrings[Tone] + IntToStr(ToneAbs div 12 + 2)
  else
    Result := '-';
end;


{ TAudioInputProcessor }

constructor TAudioInputProcessor.Create;
var
  i:        integer;
begin
  inherited;
  SetLength(Sound, 6 {max players});//Ini.Players+1);
  for i := 0 to High(Sound) do
  begin
    Sound[i] := TCaptureBuffer.Create;
  end;
end;

// updates InputDeviceConfig with current input-device information
// See: TIni.LoadInputDeviceCfg()
procedure TAudioInputProcessor.UpdateInputDeviceConfig;
var
  deviceIndex: integer;
  newDevice: boolean;
  deviceIniIndex: integer;
  deviceCfg: PInputDeviceConfig;
  device: TAudioInputDevice;
  channelCount: integer;
  channelIndex: integer;
  i: integer;
begin
  // Input devices - append detected soundcards
  for deviceIndex := 0 to High(DeviceList) do
  begin
    newDevice := true;
    //Search for Card in List
    for deviceIniIndex := 0 to High(Ini.InputDeviceConfig) do
    begin
      deviceCfg := @Ini.InputDeviceConfig[deviceIniIndex];
      device := DeviceList[deviceIndex];

      if (deviceCfg.Name = Trim(device.Name)) then
      begin
        newDevice := false;

        // store highest channel index as an offset for the new channels
        channelIndex := High(deviceCfg.ChannelToPlayerMap);
        // add missing channels or remove non-existing ones
        SetLength(deviceCfg.ChannelToPlayerMap, device.AudioFormat.Channels);
        // initialize added channels to 0
        for i := channelIndex+1 to High(deviceCfg.ChannelToPlayerMap) do
        begin
          deviceCfg.ChannelToPlayerMap[i] := 0;
        end;

        // associate ini-index with device
        device.CfgIndex := deviceIniIndex;
        break;
      end;
    end;

    //If not in List -> Add
    if newDevice then
    begin
      // resize list
      SetLength(Ini.InputDeviceConfig, Length(Ini.InputDeviceConfig)+1);
      deviceCfg := @Ini.InputDeviceConfig[High(Ini.InputDeviceConfig)];
      device := DeviceList[deviceIndex];

      // associate ini-index with device
      device.CfgIndex := High(Ini.InputDeviceConfig);

      deviceCfg.Name := Trim(device.Name);
      deviceCfg.Input := 0;

      channelCount := device.AudioFormat.Channels;
      SetLength(deviceCfg.ChannelToPlayerMap, channelCount);

      for channelIndex := 0 to channelCount-1 do
      begin
        // set default at first start of USDX (1st device, 1st channel -> player1)
        if ((channelIndex = 0) and (device.CfgIndex = 0)) then
          deviceCfg.ChannelToPlayerMap[0] := 1
        else
          deviceCfg.ChannelToPlayerMap[channelIndex] := 0;
      end;
    end;
  end;
end;

{*
 * Handle captured microphone input data.
 * Params:
 *   Buffer - buffer of signed 16bit interleaved stereo PCM-samples.
 *     Interleaved means that a right-channel sample follows a left-
 *     channel sample and vice versa (0:left[0],1:right[0],2:left[1],...).
 *   Length - number of bytes in Buffer
 *   Input - Soundcard-Input used for capture
 *}
procedure TAudioInputProcessor.HandleMicrophoneData(Buffer: Pointer; Size: Cardinal; InputDevice: TAudioInputDevice);
var
  Value:        integer;
  ChannelBuffer: PChar;         // buffer handled as array of bytes (offset relative to channel)
  SampleBuffer: PSmallIntArray; // buffer handled as array of samples
  Boost:  byte;
  ChannelIndex: integer;
  CaptureChannel: TCaptureBuffer;
  AudioFormat: TAudioFormatInfo;
  FrameSize: integer;
  NumSamples: integer;
  NumFrames: integer; // number of frames (stereo: 2xsamples)
  i: integer;
begin
  // set boost
  case Ini.MicBoost of
    0:   Boost := 1;
    1:   Boost := 2;
    2:   Boost := 4;
    3:   Boost := 8;
    else Boost := 1;
  end;

  AudioFormat := InputDevice.AudioFormat;

  // FIXME: At the moment we assume a SInt16 format
  // TODO:  use SDL_AudioConvert to convert to SInt16 but do NOT change the
  // samplerate (SDL does not convert 44.1kHz to 48kHz so we might get wrong
  // results in the analysis phase otherwise)
  if (AudioFormat.Format <> asfS16) then
  begin
    // this only occurs if a developer choosed an unsupported input sample-format
    Log.CriticalError('TAudioInputProcessor.HandleMicrophoneData: Wrong sample-format');
    Exit;
  end;

  // interpret buffer as buffer of bytes
  SampleBuffer := Buffer;

  NumSamples := Size div SizeOf(Smallint);

  // boost buffer
  // TODO: remove this senseless stuff - adjust the threshold instead
  for i := 0 to NumSamples-1 do
  begin
    Value := SampleBuffer^[i] * Boost;

    // TODO :  JB -  This will clip the audio... cant we reduce the "Boost" if the data clips ??
    if Value > High(Smallint) then
      Value := High(Smallint);

    if Value < Low(Smallint) then
      Value := Low(Smallint);

    SampleBuffer^[i] := Value;
  end;

  // samples per channel
  FrameSize := AudioFormat.Channels * SizeOf(SmallInt);
  NumFrames := Size div FrameSize;

  // process channels
  for ChannelIndex := 0 to High(InputDevice.CaptureChannel) do
  begin
    CaptureChannel := InputDevice.CaptureChannel[ChannelIndex];
    if (CaptureChannel <> nil) then
    begin
      // set offset according to channel index
      ChannelBuffer := @PChar(Buffer)[ChannelIndex * SizeOf(SmallInt)];

      // TODO: remove BufferNew and write to BufferArray directly

      CaptureChannel.BufferNew.Clear;
      for i := 0 to NumFrames-1 do
      begin
        CaptureChannel.BufferNew.Write(ChannelBuffer[i*FrameSize], SizeOf(SmallInt));
      end;
      CaptureChannel.ProcessNewBuffer();
    end;
  end;
end;


{ TAudioInputBase }

function TAudioInputBase.FinalizeRecord: boolean;
var
  i: integer;
begin
  for i := 0 to High(AudioInputProcessor.DeviceList) do
    AudioInputProcessor.DeviceList[i].Free();
  AudioInputProcessor.DeviceList := nil;
end;

{*
 * Start capturing on all used input-device.
 *}
procedure TAudioInputBase.CaptureStart;
var
  S:  integer;
  DeviceIndex: integer;
  ChannelIndex: integer;
  Device: TAudioInputDevice;
  DeviceCfg: PInputDeviceConfig;
  DeviceUsed: boolean;
  Player: integer;
begin
  if (Started) then
    CaptureStop();

  // reset buffers
  for S := 0 to High(AudioInputProcessor.Sound) do
    AudioInputProcessor.Sound[S].Clear;

  // start capturing on each used device
  for DeviceIndex := 0 to High(AudioInputProcessor.DeviceList) do
  begin
    Device := AudioInputProcessor.DeviceList[DeviceIndex];
    if not assigned(Device) then
      continue;
    DeviceCfg := @Ini.InputDeviceConfig[Device.CfgIndex];

    DeviceUsed := false;

    // check if device is used
    for ChannelIndex := 0 to High(DeviceCfg.ChannelToPlayerMap) do
    begin
      Player := DeviceCfg.ChannelToPlayerMap[ChannelIndex]-1;
      if (Player < 0) or (Player >= PlayersPlay) then
      begin
        Device.LinkCaptureBuffer(ChannelIndex, nil);
      end
      else
      begin
        Device.LinkCaptureBuffer(ChannelIndex, AudioInputProcessor.Sound[Player]);
        DeviceUsed := true;
      end;
    end;

    // start device if used
    if (DeviceUsed) then
    begin
      //Log.BenchmarkStart(2);
      Device.Start();
      //Log.BenchmarkEnd(2);
      //Log.LogBenchmark('Device.Start', 2) ;
    end;
  end;

  Started := true;
end;

{*
 * Stop input-capturing on all soundcards.
 *}
procedure TAudioInputBase.CaptureStop;
var
  DeviceIndex: integer;
  Device: TAudioInputDevice;
begin
  for DeviceIndex := 0 to High(AudioInputProcessor.DeviceList) do
  begin
    Device := AudioInputProcessor.DeviceList[DeviceIndex];
    if not assigned(Device) then
      continue;
    Device.Stop();
  end;

  Started := false;
end;

function TAudioInputBase.UnifyDeviceName(const name: string; deviceIndex: integer): string;
var
  count: integer; // count of devices with this name

  function IsDuplicate(const name: string): boolean;
  var
    i: integer;
  begin
    Result := False;
    // search devices with same description
    For i := 0 to deviceIndex-1 do
    begin
      if (AudioInputProcessor.DeviceList[i].Name = name) then
      begin
        Result := True;
        Break;
      end;
    end;
  end;
begin
  count := 1;
  result := name;

  // if there is another device with the same ID, search for an available name
  while (IsDuplicate(result)) do
  begin
    Inc(count);
    // set description
    result := name + ' ('+IntToStr(count)+')';
  end;
end;

end.
unit URecord;

interface

{$IFDEF FPC}
  {$MODE Delphi}
{$ENDIF}

{$I switches.inc}

uses Classes,
     Math,
     SysUtils,
     UCommon,
     UMusic,
     UIni;

const
  BaseToneFreq = 65.4064; // lowest (half-)tone to analyze (C2 = 65.4064 Hz)
  NumHalftones = 36; // C2-B4 (for Whitney and my high voice)

type
  TCaptureBuffer = class
    private
      BufferNew:    TMemoryStream; // buffer for newest samples

      function GetToneString: string; // converts a tone to its string represenatation;
    public
      BufferArray:  array[0..4095] of smallint; // newest 4096 samples
      BufferLong:   TMemoryStream;              // full buffer
      AnalysisBufferSize: integer; // number of samples of BufferArray to analyze

      AudioFormat: TAudioFormatInfo;

      // pitch detection
      ToneValid:    boolean;    // true if Tone contains a valid value (otherwise it contains noise)
      Tone:         integer;    // tone relative to one octave (e.g. C2=C3=C4). Range: 0-11
      ToneAbs:      integer;    // absolute (full range) tone (e.g. C2<>C3). Range: 0..NumHalftones-1

      // methods
      constructor Create;
      destructor Destroy; override;

      procedure Clear;

      procedure ProcessNewBuffer;
      // use to analyze sound from buffers to get new pitch
      procedure AnalyzeBuffer;
      // we call it to analyze sound by checking Autocorrelation
      procedure AnalyzeByAutocorrelation;
      // use this to check one frequency by Autocorrelation
      function AnalyzeAutocorrelationFreq(Freq: real): real;
      function MaxSampleVolume: Single;

      property ToneString: string READ GetToneString;
  end;

const
  DEFAULT_SOURCE_NAME = '[Default]';

type
  TAudioInputSource = record
    Name:   string;
  end;

  // soundcard input-devices information
  TAudioInputDevice = class
    public
      CfgIndex:        integer;   // index of this device in Ini.InputDeviceConfig
      Name:            string;    // soundcard name
      Source:          array of TAudioInputSource; // soundcard input-sources
      SourceRestore:   integer;  // source-index that will be selected after capturing (-1: not detected)
      MicSource:       integer;  // source-index of mic (-1: none detected)

      AudioFormat:     TAudioFormatInfo; // capture format info (e.g. 44.1kHz SInt16 stereo)
      CaptureChannel:  array of TCaptureBuffer; // sound-buffer references used for mono or stereo channel's capture data

      destructor Destroy; override;

      procedure LinkCaptureBuffer(ChannelIndex: integer; Sound: TCaptureBuffer);

      // TODO: add Open/Close functions so Start/Stop becomes faster
      //function Open(): boolean;  virtual; abstract;
      //function Close(): boolean; virtual; abstract;
      function Start(): boolean; virtual; abstract;
      function Stop(): boolean;  virtual; abstract;

      function GetVolume(): single;        virtual; abstract;
      procedure SetVolume(Volume: single); virtual; abstract;
  end;

  TAudioInputProcessor = class
    public
      Sound:  array of TCaptureBuffer; // sound-buffers for every player
      DeviceList: array of TAudioInputDevice;

      constructor Create;

      procedure UpdateInputDeviceConfig;

      // handle microphone input
      procedure HandleMicrophoneData(Buffer: Pointer; Size: Cardinal;
                                     InputDevice: TAudioInputDevice);
  end;

  TAudioInputBase = class( TInterfacedObject, IAudioInput )
    private
      Started: boolean;
    protected
      function UnifyDeviceName(const name: string; deviceIndex: integer): string;
    public
      function GetName: String;           virtual; abstract;
      function InitializeRecord: boolean; virtual; abstract;
      function FinalizeRecord: boolean;   virtual;

      procedure CaptureStart;
      procedure CaptureStop;
  end;


  SmallIntArray = array [0..maxInt shr 1-1] of smallInt;
  PSmallIntArray = ^SmallIntArray;

  function AudioInputProcessor(): TAudioInputProcessor;

implementation

uses
  ULog,
  UMain;

var
  singleton_AudioInputProcessor : TAudioInputProcessor = nil;


// FIXME: Race-Conditions between Callback-thread and main-thread
//        on BufferArray (maybe BufferNew also).
//        Use SDL-mutexes to solve this problem.


{ Global }

function AudioInputProcessor(): TAudioInputProcessor;
begin
  if singleton_AudioInputProcessor = nil then
    singleton_AudioInputProcessor := TAudioInputProcessor.create();

  result := singleton_AudioInputProcessor;
end;


{ TAudioInputDevice }

destructor TAudioInputDevice.Destroy;
begin
  Stop();
  Source := nil;
  CaptureChannel := nil;
  FreeAndNil(AudioFormat);
  inherited Destroy;
end;

procedure TAudioInputDevice.LinkCaptureBuffer(ChannelIndex: integer; Sound: TCaptureBuffer);
begin
  // check bounds
  if ((ChannelIndex < 0) or (ChannelIndex > High(CaptureChannel))) then
    Exit;

  // reset audio-format of old capture-buffer
  if (CaptureChannel[ChannelIndex] <> nil) then
    CaptureChannel[ChannelIndex].AudioFormat := nil;

  // set audio-format of new capture-buffer
  if (Sound <> nil) then
    Sound.AudioFormat := AudioFormat;

  // replace old with new buffer
  CaptureChannel[ChannelIndex] := Sound;
end;

{ TSound }

constructor TCaptureBuffer.Create;
begin
  inherited;
  BufferNew := TMemoryStream.Create;
  BufferLong := TMemoryStream.Create;
  AnalysisBufferSize := Min(4*1024, Length(BufferArray));
end;

destructor TCaptureBuffer.Destroy;
begin
  AudioFormat := nil;
  FreeAndNil(BufferNew);
  FreeAndNil(BufferLong);
  inherited;
end;

procedure TCaptureBuffer.Clear;
begin
  if assigned(BufferNew) then
    BufferNew.Clear;
  if assigned(BufferLong) then
    BufferLong.Clear;
  FillChar(BufferArray[0], Length(BufferArray) * SizeOf(SmallInt), 0);
end;

procedure TCaptureBuffer.ProcessNewBuffer;
var
  SkipCount:   integer;
  NumSamples:  integer;
  SampleIndex: integer;
begin
  // process BufferArray
  SkipCount := 0;
  NumSamples := BufferNew.Size div 2;

  // check if we have more new samples than we can store
  if (NumSamples > Length(BufferArray)) then
  begin
    // discard the oldest of the new samples
    SkipCount := NumSamples - Length(BufferArray);
    NumSamples := Length(BufferArray);
  end;

  // move old samples to the beginning of the array (if necessary)
  // TODO: should be a ring-buffer instead
  for SampleIndex := NumSamples to High(BufferArray) do
    BufferArray[SampleIndex-NumSamples] := BufferArray[SampleIndex];

  // skip samples if necessary
  BufferNew.Seek(2*SkipCount, soBeginning);
  // copy samples
  BufferNew.ReadBuffer(BufferArray[Length(BufferArray)-NumSamples], 2*NumSamples);

  // save capture-data to BufferLong if neccessary
  if (Ini.SavePlayback = 1) then
  begin
    // this is just for debugging (approx 15MB per player for a 3min song!!!)
    // For an in-game replay-mode we need to compress data so we do not
    // waste that much memory. Maybe ogg-vorbis with voice-preset in fast-mode?
    // Or we could use a faster but not that efficient lossless compression.
    BufferNew.Seek(0, soBeginning);
    BufferLong.CopyFrom(BufferNew, BufferNew.Size);
  end;
end;

procedure TCaptureBuffer.AnalyzeBuffer;
var
  Volume:    single;
  MaxVolume: single;
  SampleIndex: integer;
  Threshold: single;
begin
  ToneValid := false;
  ToneAbs := -1;
  Tone    := -1;

  // find maximum volume of first 1024 samples
  MaxVolume := 0;
  for SampleIndex := 0 to 1023 do
  begin
    Volume := Abs(BufferArray[SampleIndex]) / -Low(Smallint);
    if Volume > MaxVolume then
       MaxVolume := Volume;
  end;

  Threshold := IThresholdVals[Ini.ThresholdIndex];
  
  // check if signal has an acceptable volume (ignore background-noise)
  if MaxVolume >= Threshold then
  begin
    // analyse the current voice pitch
    AnalyzeByAutocorrelation;
    ToneValid := true;
  end;
end;

procedure TCaptureBuffer.AnalyzeByAutocorrelation;
var
  ToneIndex: integer;
  CurFreq:   real;
  CurWeight: real;
  MaxWeight: real;
  MaxTone:   integer;
const
  HalftoneBase = 1.05946309436; // 2^(1/12) -> HalftoneBase^12 = 2 (one octave)
begin
  // prepare to analyze
  MaxWeight := -1;
  MaxTone := 0; // this is not needed, but it satifies the compiler

  // analyze halftones
  // Note: at the lowest tone (~65Hz) and a buffer-size of 4096
  // at 44.1 (or 48kHz) only 6 (or 5) samples are compared, this might be
  // too few samples -> use a bigger buffer-size
  for ToneIndex := 0 to NumHalftones-1 do
  begin
    CurFreq := BaseToneFreq * Power(HalftoneBase, ToneIndex);
    CurWeight := AnalyzeAutocorrelationFreq(CurFreq);

    // TODO: prefer higher frequencies (use >= or use downto)
    if (CurWeight > MaxWeight) then
    begin
      // this frequency has a higher weight
      MaxWeight := CurWeight;
      MaxTone   := ToneIndex;
    end;
  end;

  ToneAbs := MaxTone;
  Tone    := MaxTone mod 12;
end;

// result medium difference
function TCaptureBuffer.AnalyzeAutocorrelationFreq(Freq: real): real;
var
  Dist:                   real;    // distance (0=equal .. 1=totally different) between correlated samples
  AccumDist:              real;    // accumulated distances
  SampleIndex:            integer; // index of sample to analyze
  CorrelatingSampleIndex: integer; // index of sample one period ahead
  SamplesPerPeriod:       integer; // samples in one period
begin
  SampleIndex := 0;
  SamplesPerPeriod := Round(AudioFormat.SampleRate/Freq);
  CorrelatingSampleIndex := SampleIndex + SamplesPerPeriod;

  AccumDist := 0;

  // compare correlating samples
  while (CorrelatingSampleIndex < AnalysisBufferSize) do
  begin
    // calc distance (correlation: 1-dist) to corresponding sample in next period
    Dist := Abs(BufferArray[SampleIndex] - BufferArray[CorrelatingSampleIndex]) /
            High(Word);
    AccumDist := AccumDist + Dist;
    Inc(SampleIndex);
    Inc(CorrelatingSampleIndex);
  end;

  // return "inverse" average distance (=correlation)
  Result := 1 - AccumDist / AnalysisBufferSize;
end;

function TCaptureBuffer.MaxSampleVolume: Single;
var
  lSampleIndex: Integer;
  lMaxVol : Longint;
begin;
  // FIXME: lock buffer to avoid race-conditions
  lMaxVol := 0;
  for lSampleIndex := 0 to High(BufferArray) do
  begin
    if Abs(BufferArray[lSampleIndex]) > lMaxVol then
      lMaxVol := Abs(BufferArray[lSampleIndex]);
  end;

  result := lMaxVol / -Low(Smallint);
end;

const
  ToneStrings: array[0..11] of string = (
    'C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'
  );

function TCaptureBuffer.GetToneString: string;
begin
  if (ToneValid) then
    Result := ToneStrings[Tone] + IntToStr(ToneAbs div 12 + 2)
  else
    Result := '-';
end;


{ TAudioInputProcessor }

constructor TAudioInputProcessor.Create;
var
  i:        integer;
begin
  inherited;
  SetLength(Sound, 6 {max players});//Ini.Players+1);
  for i := 0 to High(Sound) do
  begin
    Sound[i] := TCaptureBuffer.Create;
  end;
end;

// updates InputDeviceConfig with current input-device information
// See: TIni.LoadInputDeviceCfg()
procedure TAudioInputProcessor.UpdateInputDeviceConfig;
var
  deviceIndex: integer;
  newDevice: boolean;
  deviceIniIndex: integer;
  deviceCfg: PInputDeviceConfig;
  device: TAudioInputDevice;
  channelCount: integer;
  channelIndex: integer;
  i: integer;
begin
  // Input devices - append detected soundcards
  for deviceIndex := 0 to High(DeviceList) do
  begin
    newDevice := true;
    //Search for Card in List
    for deviceIniIndex := 0 to High(Ini.InputDeviceConfig) do
    begin
      deviceCfg := @Ini.InputDeviceConfig[deviceIniIndex];
      device := DeviceList[deviceIndex];

      if (deviceCfg.Name = Trim(device.Name)) then
      begin
        newDevice := false;

        // store highest channel index as an offset for the new channels
        channelIndex := High(deviceCfg.ChannelToPlayerMap);
        // add missing channels or remove non-existing ones
        SetLength(deviceCfg.ChannelToPlayerMap, device.AudioFormat.Channels);
        // initialize added channels to 0
        for i := channelIndex+1 to High(deviceCfg.ChannelToPlayerMap) do
        begin
          deviceCfg.ChannelToPlayerMap[i] := 0;
        end;

        // associate ini-index with device
        device.CfgIndex := deviceIniIndex;
        break;
      end;
    end;

    //If not in List -> Add
    if newDevice then
    begin
      // resize list
      SetLength(Ini.InputDeviceConfig, Length(Ini.InputDeviceConfig)+1);
      deviceCfg := @Ini.InputDeviceConfig[High(Ini.InputDeviceConfig)];
      device := DeviceList[deviceIndex];

      // associate ini-index with device
      device.CfgIndex := High(Ini.InputDeviceConfig);

      deviceCfg.Name := Trim(device.Name);
      deviceCfg.Input := 0;

      channelCount := device.AudioFormat.Channels;
      SetLength(deviceCfg.ChannelToPlayerMap, channelCount);

      for channelIndex := 0 to channelCount-1 do
      begin
        // set default at first start of USDX (1st device, 1st channel -> player1)
        if ((channelIndex = 0) and (device.CfgIndex = 0)) then
          deviceCfg.ChannelToPlayerMap[0] := 1
        else
          deviceCfg.ChannelToPlayerMap[channelIndex] := 0;
      end;
    end;
  end;
end;

{*
 * Handle captured microphone input data.
 * Params:
 *   Buffer - buffer of signed 16bit interleaved stereo PCM-samples.
 *     Interleaved means that a right-channel sample follows a left-
 *     channel sample and vice versa (0:left[0],1:right[0],2:left[1],...).
 *   Length - number of bytes in Buffer
 *   Input - Soundcard-Input used for capture
 *}
procedure TAudioInputProcessor.HandleMicrophoneData(Buffer: Pointer; Size: Cardinal; InputDevice: TAudioInputDevice);
var
  Value:        integer;
  ChannelBuffer: PChar;         // buffer handled as array of bytes (offset relative to channel)
  SampleBuffer: PSmallIntArray; // buffer handled as array of samples
  Boost:  byte;
  ChannelIndex: integer;
  CaptureChannel: TCaptureBuffer;
  AudioFormat: TAudioFormatInfo;
  FrameSize: integer;
  NumSamples: integer;
  NumFrames: integer; // number of frames (stereo: 2xsamples)
  i: integer;
begin
  // set boost
  case Ini.MicBoost of
    0:   Boost := 1;
    1:   Boost := 2;
    2:   Boost := 4;
    3:   Boost := 8;
    else Boost := 1;
  end;

  AudioFormat := InputDevice.AudioFormat;

  // FIXME: At the moment we assume a SInt16 format
  // TODO:  use SDL_AudioConvert to convert to SInt16 but do NOT change the
  // samplerate (SDL does not convert 44.1kHz to 48kHz so we might get wrong
  // results in the analysis phase otherwise)
  if (AudioFormat.Format <> asfS16) then
  begin
    // this only occurs if a developer choosed an unsupported input sample-format
    Log.CriticalError('TAudioInputProcessor.HandleMicrophoneData: Wrong sample-format');
    Exit;
  end;

  // interpret buffer as buffer of bytes
  SampleBuffer := Buffer;

  NumSamples := Size div SizeOf(Smallint);

  // boost buffer
  // TODO: remove this senseless stuff - adjust the threshold instead
  for i := 0 to NumSamples-1 do
  begin
    Value := SampleBuffer^[i] * Boost;

    // TODO :  JB -  This will clip the audio... cant we reduce the "Boost" if the data clips ??
    if Value > High(Smallint) then
      Value := High(Smallint);

    if Value < Low(Smallint) then
      Value := Low(Smallint);

    SampleBuffer^[i] := Value;
  end;

  // samples per channel
  FrameSize := AudioFormat.Channels * SizeOf(SmallInt);
  NumFrames := Size div FrameSize;

  // process channels
  for ChannelIndex := 0 to High(InputDevice.CaptureChannel) do
  begin
    CaptureChannel := InputDevice.CaptureChannel[ChannelIndex];
    if (CaptureChannel <> nil) then
    begin
      // set offset according to channel index
      ChannelBuffer := @PChar(Buffer)[ChannelIndex * SizeOf(SmallInt)];

      // TODO: remove BufferNew and write to BufferArray directly

      CaptureChannel.BufferNew.Clear;
      for i := 0 to NumFrames-1 do
      begin
        CaptureChannel.BufferNew.Write(ChannelBuffer[i*FrameSize], SizeOf(SmallInt));
      end;
      CaptureChannel.ProcessNewBuffer();
    end;
  end;
end;


{ TAudioInputBase }

function TAudioInputBase.FinalizeRecord: boolean;
var
  i: integer;
begin
  for i := 0 to High(AudioInputProcessor.DeviceList) do
    AudioInputProcessor.DeviceList[i].Free();
  AudioInputProcessor.DeviceList := nil;
end;

{*
 * Start capturing on all used input-device.
 *}
procedure TAudioInputBase.CaptureStart;
var
  S:  integer;
  DeviceIndex: integer;
  ChannelIndex: integer;
  Device: TAudioInputDevice;
  DeviceCfg: PInputDeviceConfig;
  DeviceUsed: boolean;
  Player: integer;
begin
  if (Started) then
    CaptureStop();

  // reset buffers
  for S := 0 to High(AudioInputProcessor.Sound) do
    AudioInputProcessor.Sound[S].Clear;

  // start capturing on each used device
  for DeviceIndex := 0 to High(AudioInputProcessor.DeviceList) do
  begin
    Device := AudioInputProcessor.DeviceList[DeviceIndex];
    if not assigned(Device) then
      continue;
    DeviceCfg := @Ini.InputDeviceConfig[Device.CfgIndex];

    DeviceUsed := false;

    // check if device is used
    for ChannelIndex := 0 to High(DeviceCfg.ChannelToPlayerMap) do
    begin
      Player := DeviceCfg.ChannelToPlayerMap[ChannelIndex]-1;
      if (Player < 0) or (Player >= PlayersPlay) then
      begin
        Device.LinkCaptureBuffer(ChannelIndex, nil);
      end
      else
      begin
        Device.LinkCaptureBuffer(ChannelIndex, AudioInputProcessor.Sound[Player]);
        DeviceUsed := true;
      end;
    end;

    // start device if used
    if (DeviceUsed) then
    begin
      //Log.BenchmarkStart(2);
      Device.Start();
      //Log.BenchmarkEnd(2);
      //Log.LogBenchmark('Device.Start', 2) ;
    end;
  end;

  Started := true;
end;

{*
 * Stop input-capturing on all soundcards.
 *}
procedure TAudioInputBase.CaptureStop;
var
  DeviceIndex: integer;
  Device: TAudioInputDevice;
begin
  for DeviceIndex := 0 to High(AudioInputProcessor.DeviceList) do
  begin
    Device := AudioInputProcessor.DeviceList[DeviceIndex];
    if not assigned(Device) then
      continue;
    Device.Stop();
  end;

  Started := false;
end;

function TAudioInputBase.UnifyDeviceName(const name: string; deviceIndex: integer): string;
var
  count: integer; // count of devices with this name

  function IsDuplicate(const name: string): boolean;
  var
    i: integer;
  begin
    Result := False;
    // search devices with same description
    For i := 0 to deviceIndex-1 do
    begin
      if (AudioInputProcessor.DeviceList[i].Name = name) then
      begin
        Result := True;
        Break;
      end;
    end;
  end;
begin
  count := 1;
  result := name;

  // if there is another device with the same ID, search for an available name
  while (IsDuplicate(result)) do
  begin
    Inc(count);
    // set description
    result := name + ' ('+IntToStr(count)+')';
  end;
end;

end.