From cdbf23cd0329e0403b707f21b25adcbb4d37433e Mon Sep 17 00:00:00 2001
From: tobigun <tobigun@b956fd51-792f-4845-bead-9b4dfca2ff2c>
Date: Wed, 29 Dec 2010 14:41:27 +0000
Subject: log voice captures to wav-file

git-svn-id: svn://svn.code.sf.net/p/ultrastardx/svn/trunk@2781 b956fd51-792f-4845-bead-9b4dfca2ff2c
---
 src/base/ULog.pas    | 97 ++++++++++++++++++++++++++++++++++++++++++++++------
 src/base/URecord.pas | 66 ++++++++++++++++++++---------------
 2 files changed, 125 insertions(+), 38 deletions(-)

(limited to 'src/base')

diff --git a/src/base/ULog.pas b/src/base/ULog.pas
index e4ff4862..ea3cda4e 100644
--- a/src/base/ULog.pas
+++ b/src/base/ULog.pas
@@ -130,7 +130,8 @@ uses
   SysUtils,
   DateUtils,
   URecord,
-  UMain,  
+  UMain,
+  UMusic,  
   UTime,
   UCommon,
   UCommandLine,
@@ -398,26 +399,102 @@ begin
   LogMsg(Msg, Context, LOG_LEVEL_CRITICAL);
 end;
 
+type
+  TRiffChunkID = array[0..3] of byte;
+
+  TRiffChunk = packed record
+    ID: TRiffChunkID;
+    DataSize: cardinal;
+  end;
+
+  TRiffHeader = packed record
+    ChunkInfo: TRiffChunk;
+    RiffType: TRiffChunkID;
+  end;
+
+  TWaveFmtChunk = packed record
+    ChunkInfo: TRiffChunk;
+    FormatTag: word;
+    NumChannels: word;
+    SamplesPerSec: cardinal;
+    AvgBytesPerSec: cardinal;
+    BlockAlign: word;
+    BitsPerSample: word;
+  end;
+
 procedure TLog.LogVoice(SoundNr: integer);
 var
-  FS:           TBinaryFileStream;
-  Prefix:       string;
-  FileName:     IPath;
-  Num:          integer;
+  Stream: TBinaryFileStream;
+  Prefix: string;
+  FileName: IPath;
+  Num: integer;
+  CaptureBuffer: TCaptureBuffer;
+  Buffer: TMemoryStream;
+  FormatInfo: TAudioFormatInfo;
+  WaveHdr: TRiffHeader;
+  WaveFmt: TWaveFmtChunk;
+  DataChunk: TRiffChunk;
+  UseWavFile: boolean;
+  FileExt: string;
+const
+  Channels = 1;
+  SampleRate = 44100;
+  RIFF_CHUNK_HDR: TRiffChunkID = (Ord('R'), Ord('I'), Ord('F'), Ord('F'));
+  RIFF_CHUNK_FMT: TRiffChunkID = (Ord('f'), Ord('m'), Ord('t'), Ord(' '));
+  RIFF_CHUNK_DATA: TRiffChunkID = (Ord('d'), Ord('a'), Ord('t'), Ord('a'));
+  RIFF_TYPE_WAVE: TRiffChunkID = (Ord('W'), Ord('A'), Ord('V'), Ord('E'));
+  WAVE_FORMAT_PCM = 1; // PCM (uncompressed)
 begin
+  CaptureBuffer := AudioInputProcessor.Sound[SoundNr];
+  Buffer := CaptureBuffer.LogBuffer;
+  FormatInfo := CaptureBuffer.AudioFormat;
+
+  // not all formats can be stored in a wav-file
+  UseWavFile := (FormatInfo.Format in [asfU8, asfS16, asfS16LSB]);
+
+  // create output filename
   for Num := 1 to 9999 do begin
     Prefix := Format('Voice%.4d', [Num]);
-    FileName := LogPath.Append(Prefix + '.raw');
+    if (UseWavFile) then
+      FileExt := '.wav'
+    else
+      FileExt := '.raw';
+    FileName := LogPath.Append(Prefix + FileExt);
     if not FileName.Exists() then
       break
   end;
 
-  FS := TBinaryFileStream.Create(FileName, fmCreate);
+  // open output file
+  Stream := TBinaryFileStream.Create(FileName, fmCreate);
+  
+  // write wav-file header
+  if (UseWavFile) then
+  begin
+    WaveHdr.ChunkInfo.ID := RIFF_CHUNK_HDR;
+    WaveHdr.ChunkInfo.DataSize := (SizeOf(TRiffHeader) - 8) +
+        SizeOf(TWaveFmtChunk) + SizeOf(TRiffChunk) + Buffer.Size;
+    WaveHdr.RiffType := RIFF_TYPE_WAVE;
+    Stream.Write(WaveHdr, SizeOf(TRiffHeader));
+
+    WaveFmt.ChunkInfo.ID := RIFF_CHUNK_FMT;
+    WaveFmt.ChunkInfo.DataSize := SizeOf(TWaveFmtChunk) - 8;
+    WaveFmt.FormatTag := WAVE_FORMAT_PCM;
+    WaveFmt.NumChannels := FormatInfo.Channels;
+    WaveFmt.SamplesPerSec := Round(FormatInfo.SampleRate);
+    WaveFmt.AvgBytesPerSec := Round(FormatInfo.BytesPerSec);
+    WaveFmt.BlockAlign := FormatInfo.FrameSize;
+    WaveFmt.BitsPerSample := FormatInfo.SampleSize * 8;
+    Stream.Write(WaveFmt, SizeOf(TWaveFmtChunk));
+
+    DataChunk.ID := RIFF_CHUNK_DATA;
+    DataChunk.DataSize := Buffer.Size;
+    Stream.Write(DataChunk, SizeOf(TRiffChunk));
+  end;
 
-  AudioInputProcessor.Sound[SoundNr].LogBuffer.Seek(0, soBeginning);
-  FS.CopyFrom(AudioInputProcessor.Sound[SoundNr].LogBuffer, AudioInputProcessor.Sound[SoundNr].LogBuffer.Size);
+  Buffer.Seek(0, soBeginning);
+  Stream.CopyFrom(Buffer, Buffer.Size);
 
-  FS.Free;
+  Stream.Free;
 end;
 
 procedure TLog.LogBuffer(const buf: Pointer; const bufLength: Integer; const filename: IPath);
diff --git a/src/base/URecord.pas b/src/base/URecord.pas
index c4b08211..d3886f3d 100644
--- a/src/base/URecord.pas
+++ b/src/base/URecord.pas
@@ -49,14 +49,18 @@ const
 type
   TCaptureBuffer = class
     private
-      VoiceStream: TAudioVoiceStream; // stream for voice passthrough
-      AnalysisBufferLock: PSDL_Mutex;
+      fVoiceStream: TAudioVoiceStream; // stream for voice passthrough
+      fAnalysisBufferLock: PSDL_Mutex;
+      fAudioFormat: TAudioFormatInfo;
 
       function GetToneString: string; // converts a tone to its string represenatation;
 
       procedure BoostBuffer(Buffer: PByteArray; Size: integer);
       procedure ProcessNewBuffer(Buffer: PByteArray; BufferSize: integer);
 
+      procedure StartCapture(Format: TAudioFormatInfo);
+      procedure StopCapture();
+
       // we call it to analyze sound by checking Autocorrelation
       procedure AnalyzeByAutocorrelation;
       // use this to check one frequency by Autocorrelation
@@ -67,8 +71,6 @@ type
 
       LogBuffer:   TMemoryStream;              // full buffer
 
-      AudioFormat: TAudioFormatInfo;
-
       // pitch detection
       // TODO: remove ToneValid, set Tone/ToneAbs=-1 if invalid instead
       ToneValid:    boolean;    // true if Tone contains a valid value (otherwise it contains noise)
@@ -88,6 +90,7 @@ type
 
       function MaxSampleVolume: single;
       property ToneString: string READ GetToneString;
+      property AudioFormat: TAudioFormatInfo READ fAudioFormat;
   end;
 
 const
@@ -220,7 +223,6 @@ end;
 
 procedure TAudioInputDevice.LinkCaptureBuffer(ChannelIndex: integer; Sound: TCaptureBuffer);
 var
-  DeviceCfg: PInputDeviceConfig;
   OldSound: TCaptureBuffer;
 begin
   // check bounds
@@ -231,26 +233,13 @@ begin
   OldSound := CaptureChannel[ChannelIndex];
   if (OldSound <> nil) then
   begin
-    // close voice stream
-    FreeAndNil(OldSound.VoiceStream);
-    // free old audio-format info
-    FreeAndNil(OldSound.AudioFormat);
+    OldSound.StopCapture();
   end;
 
   // set audio-format of new capture-buffer
   if (Sound <> nil) then
   begin
-    // copy the input-device audio-format ...
-    Sound.AudioFormat := AudioFormat.Copy;
-    // and adjust it because capture buffers are always mono
-    Sound.AudioFormat.Channels := 1;
-    DeviceCfg := @Ini.InputDeviceConfig[CfgIndex];
-
-    if (Ini.VoicePassthrough = 1) then
-    begin
-      // TODO: map odd players to the left and even players to the right speaker
-      Sound.VoiceStream := AudioPlayback.CreateVoiceStream(CHANNELMAP_FRONT, AudioFormat);
-    end;
+    Sound.StartCapture(AudioFormat);
   end;
 
   // replace old with new buffer (Note: Sound might be nil)
@@ -263,27 +252,27 @@ constructor TCaptureBuffer.Create;
 begin
   inherited;
   LogBuffer := TMemoryStream.Create;
-  AnalysisBufferLock := SDL_CreateMutex();
+  fAnalysisBufferLock := SDL_CreateMutex();
   AnalysisBufferSize := Length(AnalysisBuffer);
 end;
 
 destructor TCaptureBuffer.Destroy;
 begin
   FreeAndNil(LogBuffer);
-  FreeAndNil(VoiceStream);
-  FreeAndNil(AudioFormat);
-  SDL_DestroyMutex(AnalysisBufferLock);
+  FreeAndNil(fVoiceStream);
+  FreeAndNil(fAudioFormat);
+  SDL_DestroyMutex(fAnalysisBufferLock);
   inherited;
 end;
 
 procedure TCaptureBuffer.LockAnalysisBuffer();
 begin
-  SDL_mutexP(AnalysisBufferLock);
+  SDL_mutexP(fAnalysisBufferLock);
 end;
 
 procedure TCaptureBuffer.UnlockAnalysisBuffer();
 begin
-  SDL_mutexV(AnalysisBufferLock);
+  SDL_mutexV(fAnalysisBufferLock);
 end;
 
 procedure TCaptureBuffer.Clear;
@@ -305,8 +294,8 @@ begin
   BoostBuffer(Buffer, BufferSize);
 
   // voice passthrough (send data to playback-device)
-  if (assigned(VoiceStream)) then
-    VoiceStream.WriteData(Buffer, BufferSize);
+  if (assigned(fVoiceStream)) then
+    fVoiceStream.WriteData(Buffer, BufferSize);
 
   // we assume that samples are in S16Int format
   // TODO: support float too
@@ -529,6 +518,27 @@ begin
   end;
 end;
 
+procedure TCaptureBuffer.StartCapture(Format: TAudioFormatInfo);
+begin
+  // free old audio-format info
+  FreeAndNil(fAudioFormat);
+  // copy the new input-device audio-format ...
+  fAudioFormat := Format.Copy;
+  // and adjust it because capture buffers are always mono
+  fAudioFormat.Channels := 1;
+
+  if (Ini.VoicePassthrough = 1) then
+  begin
+    // TODO: map odd players to the left and even players to the right speaker
+    fVoiceStream := AudioPlayback.CreateVoiceStream(CHANNELMAP_FRONT, fAudioFormat);
+  end;
+end;
+
+procedure TCaptureBuffer.StopCapture();
+begin
+  FreeAndNil(fVoiceStream);
+end;
+
 { TAudioInputProcessor }
 
 constructor TAudioInputProcessor.Create;
-- 
cgit v1.2.3