diff options
Diffstat (limited to 'src/media/UAudioConverter.pas')
-rw-r--r-- | src/media/UAudioConverter.pas | 458 |
1 files changed, 458 insertions, 0 deletions
diff --git a/src/media/UAudioConverter.pas b/src/media/UAudioConverter.pas new file mode 100644 index 00000000..5647f27b --- /dev/null +++ b/src/media/UAudioConverter.pas @@ -0,0 +1,458 @@ +unit UAudioConverter; + +interface + +{$IFDEF FPC} + {$MODE Delphi} +{$ENDIF} + +{$I switches.inc} + +uses + UMusic, + ULog, + ctypes, + {$IFDEF UseSRCResample} + samplerate, + {$ENDIF} + {$IFDEF UseFFmpegResample} + avcodec, + {$ENDIF} + UMediaCore_SDL, + sdl, + SysUtils, + Math; + +type + {* + * Notes: + * - 44.1kHz to 48kHz conversion or vice versa is not supported + * by SDL 1.2 (will be introduced in 1.3). + * No conversion takes place in this cases. + * This is because SDL just converts differences in powers of 2. + * So the result might not be that accurate. + * This IS audible (voice to high/low) and it needs good synchronization + * with the video or the lyrics timer. + * - float<->int16 conversion is not supported (will be part of 1.3) and + * SDL (<1.3) is not capable of handling floats at all. + * -> Using FFmpeg or libsamplerate for resampling is preferred. + * Use SDL for channel and format conversion only. + *} + TAudioConverter_SDL = class(TAudioConverter) + private + cvt: TSDL_AudioCVT; + public + function Init(SrcFormatInfo: TAudioFormatInfo; DstFormatInfo: TAudioFormatInfo): boolean; override; + destructor Destroy(); override; + + function Convert(InputBuffer: PChar; OutputBuffer: PChar; var InputSize: integer): integer; override; + function GetOutputBufferSize(InputSize: integer): integer; override; + function GetRatio(): double; override; + end; + + {$IFDEF UseFFmpegResample} + // Note: FFmpeg seems to be using "kaiser windowed sinc" for resampling, so + // the quality should be good. + TAudioConverter_FFmpeg = class(TAudioConverter) + private + // TODO: use SDL for multi-channel->stereo and format conversion + ResampleContext: PReSampleContext; + Ratio: double; + public + function Init(SrcFormatInfo: TAudioFormatInfo; DstFormatInfo: TAudioFormatInfo): boolean; override; + destructor Destroy(); override; + + function Convert(InputBuffer: PChar; OutputBuffer: PChar; var InputSize: integer): integer; override; + function GetOutputBufferSize(InputSize: integer): integer; override; + function GetRatio(): double; override; + end; + {$ENDIF} + + {$IFDEF UseSRCResample} + TAudioConverter_SRC = class(TAudioConverter) + private + ConverterState: PSRC_STATE; + ConversionData: SRC_DATA; + FormatConverter: TAudioConverter; + public + function Init(SrcFormatInfo: TAudioFormatInfo; DstFormatInfo: TAudioFormatInfo): boolean; override; + destructor Destroy(); override; + + function Convert(InputBuffer: PChar; OutputBuffer: PChar; var InputSize: integer): integer; override; + function GetOutputBufferSize(InputSize: integer): integer; override; + function GetRatio(): double; override; + end; + + // Note: SRC (=libsamplerate) provides several converters with different quality + // speed trade-offs. The SINC-types are slow but offer best quality. + // The SRC_SINC_* converters are too slow for realtime conversion, + // (SRC_SINC_FASTEST is approx. ten times slower than SRC_LINEAR) resulting + // in audible clicks and pops. + // SRC_LINEAR is very fast and should have a better quality than SRC_ZERO_ORDER_HOLD + // because it interpolates the samples. Normal "non-audiophile" users should not + // be able to hear a difference between the SINC_* ones and LINEAR. Especially + // if people sing along with the song. + // But FFmpeg might offer a better quality/speed ratio than SRC_LINEAR. + const + SRC_CONVERTER_TYPE = SRC_LINEAR; + {$ENDIF} + +implementation + +function TAudioConverter_SDL.Init(srcFormatInfo: TAudioFormatInfo; dstFormatInfo: TAudioFormatInfo): boolean; +var + srcFormat: UInt16; + dstFormat: UInt16; +begin + inherited Init(SrcFormatInfo, DstFormatInfo); + + Result := false; + + if (not ConvertAudioFormatToSDL(srcFormatInfo.Format, srcFormat) or + not ConvertAudioFormatToSDL(dstFormatInfo.Format, dstFormat)) then + begin + Log.LogError('Audio-format not supported by SDL', 'TSoftMixerPlaybackStream.InitFormatConversion'); + Exit; + end; + + if (SDL_BuildAudioCVT(@cvt, + srcFormat, srcFormatInfo.Channels, Round(srcFormatInfo.SampleRate), + dstFormat, dstFormatInfo.Channels, Round(dstFormatInfo.SampleRate)) = -1) then + begin + Log.LogError(SDL_GetError(), 'TSoftMixerPlaybackStream.InitFormatConversion'); + Exit; + end; + + Result := true; +end; + +destructor TAudioConverter_SDL.Destroy(); +begin + // nothing to be done here + inherited; +end; + +(* + * Returns the size of the output buffer. This might be bigger than the actual + * size of resampled audio data. + *) +function TAudioConverter_SDL.GetOutputBufferSize(InputSize: integer): integer; +begin + // Note: len_ratio must not be used here. Even if the len_ratio is 1.0, len_mult might be 2. + // Example: 44.1kHz/mono to 22.05kHz/stereo -> len_ratio=1, len_mult=2 + Result := InputSize * cvt.len_mult; +end; + +function TAudioConverter_SDL.GetRatio(): double; +begin + Result := cvt.len_ratio; +end; + +function TAudioConverter_SDL.Convert(InputBuffer: PChar; OutputBuffer: PChar; var InputSize: integer): integer; +begin + Result := -1; + + if (InputSize <= 0) then + begin + // avoid div-by-zero problems + if (InputSize = 0) then + Result := 0; + Exit; + end; + + // OutputBuffer is always bigger than or equal to InputBuffer + Move(InputBuffer[0], OutputBuffer[0], InputSize); + cvt.buf := PUint8(OutputBuffer); + cvt.len := InputSize; + if (SDL_ConvertAudio(@cvt) = -1) then + Exit; + + Result := cvt.len_cvt; +end; + + +{$IFDEF UseFFmpegResample} + +function TAudioConverter_FFmpeg.Init(SrcFormatInfo: TAudioFormatInfo; DstFormatInfo: TAudioFormatInfo): boolean; +begin + inherited Init(SrcFormatInfo, DstFormatInfo); + + Result := false; + + // Note: ffmpeg does not support resampling for more than 2 input channels + + if (srcFormatInfo.Format <> asfS16) then + begin + Log.LogError('Unsupported format', 'TAudioConverter_FFmpeg.Init'); + Exit; + end; + + // TODO: use SDL here + if (srcFormatInfo.Format <> dstFormatInfo.Format) then + begin + Log.LogError('Incompatible formats', 'TAudioConverter_FFmpeg.Init'); + Exit; + end; + + ResampleContext := audio_resample_init( + dstFormatInfo.Channels, srcFormatInfo.Channels, + Round(dstFormatInfo.SampleRate), Round(srcFormatInfo.SampleRate)); + if (ResampleContext = nil) then + begin + Log.LogError('audio_resample_init() failed', 'TAudioConverter_FFmpeg.Init'); + Exit; + end; + + // calculate ratio + Ratio := (dstFormatInfo.Channels / srcFormatInfo.Channels) * + (dstFormatInfo.SampleRate / srcFormatInfo.SampleRate); + + Result := true; +end; + +destructor TAudioConverter_FFmpeg.Destroy(); +begin + if (ResampleContext <> nil) then + audio_resample_close(ResampleContext); + inherited; +end; + +function TAudioConverter_FFmpeg.Convert(InputBuffer: PChar; OutputBuffer: PChar; var InputSize: integer): integer; +var + InputSampleCount: integer; + OutputSampleCount: integer; +begin + Result := -1; + + if (InputSize <= 0) then + begin + // avoid div-by-zero in audio_resample() + if (InputSize = 0) then + Result := 0; + Exit; + end; + + InputSampleCount := InputSize div SrcFormatInfo.FrameSize; + OutputSampleCount := audio_resample( + ResampleContext, PSmallInt(OutputBuffer), PSmallInt(InputBuffer), + InputSampleCount); + if (OutputSampleCount = -1) then + begin + Log.LogError('audio_resample() failed', 'TAudioConverter_FFmpeg.Convert'); + Exit; + end; + Result := OutputSampleCount * DstFormatInfo.FrameSize; +end; + +function TAudioConverter_FFmpeg.GetOutputBufferSize(InputSize: integer): integer; +begin + Result := Ceil(InputSize * GetRatio()); +end; + +function TAudioConverter_FFmpeg.GetRatio(): double; +begin + Result := Ratio; +end; + +{$ENDIF} + + +{$IFDEF UseSRCResample} + +function TAudioConverter_SRC.Init(SrcFormatInfo: TAudioFormatInfo; DstFormatInfo: TAudioFormatInfo): boolean; +var + error: integer; + TempSrcFormatInfo: TAudioFormatInfo; + TempDstFormatInfo: TAudioFormatInfo; +begin + inherited Init(SrcFormatInfo, DstFormatInfo); + + Result := false; + + FormatConverter := nil; + + // SRC does not handle channel or format conversion + if ((SrcFormatInfo.Channels <> DstFormatInfo.Channels) or + not (SrcFormatInfo.Format in [asfS16, asfFloat])) then + begin + // SDL can not convert to float, so we have to convert to SInt16 first + TempSrcFormatInfo := TAudioFormatInfo.Create( + SrcFormatInfo.Channels, SrcFormatInfo.SampleRate, SrcFormatInfo.Format); + TempDstFormatInfo := TAudioFormatInfo.Create( + DstFormatInfo.Channels, SrcFormatInfo.SampleRate, asfS16); + + // init format/channel conversion + FormatConverter := TAudioConverter_SDL.Create(); + if (not FormatConverter.Init(TempSrcFormatInfo, TempDstFormatInfo)) then + begin + Log.LogError('Unsupported input format', 'TAudioConverter_SRC.Init'); + FormatConverter.Free; + // exit after the format-info is freed + end; + + // this info was copied so we do not need it anymore + TempSrcFormatInfo.Free; + TempDstFormatInfo.Free; + + // leave if the format is not supported + if (not assigned(FormatConverter)) then + Exit; + + // adjust our copy of the input audio-format for SRC conversion + Self.SrcFormatInfo.Channels := DstFormatInfo.Channels; + Self.SrcFormatInfo.Format := asfS16; + end; + + if ((DstFormatInfo.Format <> asfS16) and + (DstFormatInfo.Format <> asfFloat)) then + begin + Log.LogError('Unsupported output format', 'TAudioConverter_SRC.Init'); + Exit; + end; + + ConversionData.src_ratio := DstFormatInfo.SampleRate / SrcFormatInfo.SampleRate; + if (src_is_valid_ratio(ConversionData.src_ratio) = 0) then + begin + Log.LogError('Invalid samplerate ratio', 'TAudioConverter_SRC.Init'); + Exit; + end; + + ConverterState := src_new(SRC_CONVERTER_TYPE, DstFormatInfo.Channels, @error); + if (ConverterState = nil) then + begin + Log.LogError('src_new() failed: ' + src_strerror(error), 'TAudioConverter_SRC.Init'); + Exit; + end; + + Result := true; +end; + +destructor TAudioConverter_SRC.Destroy(); +begin + if (ConverterState <> nil) then + src_delete(ConverterState); + FormatConverter.Free; + inherited; +end; + +function TAudioConverter_SRC.Convert(InputBuffer: PChar; OutputBuffer: PChar; var InputSize: integer): integer; +var + FloatInputBuffer: PSingle; + FloatOutputBuffer: PSingle; + TempBuffer: PChar; + TempSize: integer; + NumSamples: integer; + OutputSize: integer; + error: integer; +begin + Result := -1; + + TempBuffer := nil; + + // format conversion with external converter (to correct number of channels and format) + if (assigned(FormatConverter)) then + begin + TempSize := FormatConverter.GetOutputBufferSize(InputSize); + GetMem(TempBuffer, TempSize); + InputSize := FormatConverter.Convert(InputBuffer, TempBuffer, InputSize); + InputBuffer := TempBuffer; + end; + + if (InputSize <= 0) then + begin + // avoid div-by-zero problems + if (InputSize = 0) then + Result := 0; + if (TempBuffer <> nil) then + FreeMem(TempBuffer); + Exit; + end; + + if (SrcFormatInfo.Format = asfFloat) then + begin + FloatInputBuffer := PSingle(InputBuffer); + end else begin + NumSamples := InputSize div AudioSampleSize[SrcFormatInfo.Format]; + GetMem(FloatInputBuffer, NumSamples * SizeOf(Single)); + src_short_to_float_array(PCshort(InputBuffer), PCfloat(FloatInputBuffer), NumSamples); + end; + + // calculate approx. output size + OutputSize := Ceil(InputSize * ConversionData.src_ratio); + + if (DstFormatInfo.Format = asfFloat) then + begin + FloatOutputBuffer := PSingle(OutputBuffer); + end else begin + NumSamples := OutputSize div AudioSampleSize[DstFormatInfo.Format]; + GetMem(FloatOutputBuffer, NumSamples * SizeOf(Single)); + end; + + with ConversionData do + begin + data_in := PCFloat(FloatInputBuffer); + input_frames := InputSize div SrcFormatInfo.FrameSize; + data_out := PCFloat(FloatOutputBuffer); + output_frames := OutputSize div DstFormatInfo.FrameSize; + // TODO: set this to 1 at end of file-playback + end_of_input := 0; + end; + + error := src_process(ConverterState, @ConversionData); + if (error <> 0) then + begin + Log.LogError(src_strerror(error), 'TAudioConverter_SRC.Convert'); + if (SrcFormatInfo.Format <> asfFloat) then + FreeMem(FloatInputBuffer); + if (DstFormatInfo.Format <> asfFloat) then + FreeMem(FloatOutputBuffer); + if (TempBuffer <> nil) then + FreeMem(TempBuffer); + Exit; + end; + + if (SrcFormatInfo.Format <> asfFloat) then + FreeMem(FloatInputBuffer); + + if (DstFormatInfo.Format <> asfFloat) then + begin + NumSamples := ConversionData.output_frames_gen * DstFormatInfo.Channels; + src_float_to_short_array(PCfloat(FloatOutputBuffer), PCshort(OutputBuffer), NumSamples); + FreeMem(FloatOutputBuffer); + end; + + // free format conversion buffer if used + if (TempBuffer <> nil) then + FreeMem(TempBuffer); + + if (assigned(FormatConverter)) then + InputSize := ConversionData.input_frames_used * FormatConverter.SrcFormatInfo.FrameSize + else + InputSize := ConversionData.input_frames_used * SrcFormatInfo.FrameSize; + + // set result to output size according to SRC + Result := ConversionData.output_frames_gen * DstFormatInfo.FrameSize; +end; + +function TAudioConverter_SRC.GetOutputBufferSize(InputSize: integer): integer; +begin + Result := Ceil(InputSize * GetRatio()); +end; + +function TAudioConverter_SRC.GetRatio(): double; +begin + // if we need additional channel/format conversion, use this ratio + if (assigned(FormatConverter)) then + Result := FormatConverter.GetRatio() + else + Result := 1.0; + + // now the SRC ratio (Note: the format might change from SInt16 to float) + Result := Result * + ConversionData.src_ratio * + (DstFormatInfo.FrameSize / SrcFormatInfo.FrameSize); +end; + +{$ENDIF} + +end.
\ No newline at end of file |