const
  WAVE_FORMAT_PCM = 1;

type
  TPcmHeader = packed record
    FormatTag: Word;
    Channels: Word;
    SampleRate: Integer;
    BytesPerSecond: Integer;
    BlockAlign: Word;
    BitsPerSample: Word;
  end;


function DecodeWav(inp: TStream): TSmallIntArray;
var
  ChunkLen, ChunkLenCount, SubChunkLen: dword;
  i: integer;
  dud: byte;
  PcmHeader: TPcmHeader;
  function IsStr(s: string): boolean;
  var b: string;
  begin
    SetLength(b, length(s));
    inp.Read(b[1], length(b));
    Result:= s = b;
  end;

begin
  Result:= nil;

  // verifying the RIFF header:
  if not IsStr('RIFF') then Exit;
  inp.read(ChunkLen, 4);
  if not IsStr('WAVE') then Exit;

  // searching for the "fmt" subchunk:
  ChunkLenCount:= 0;
  PcmHeader.Channels:= 0;
  while (ChunkLenCount < ChunkLen) and (PcmHeader.Channels = 0) do begin
    if IsStr('fmt ') then begin
      inp.Read(SubChunkLen, 4);
      if SubChunkLen <> 16 then Exit;
      inp.Read(PcmHeader, 16);
    end
    else begin
      inp. Read(SubChunkLen, 4);
      for i:=1 to SubChunkLen do inp.Read(dud, 1);
    end;
    ChunkLenCount+= SubChunkLen;
  end;

  // verifying it's 16-bit 44kHz mono PCM
  with PcmHeader do begin
    if Channels <> 1 then Exit;
    if FormatTag <> WAVE_FORMAT_PCM then Exit;
    //if sampleRate <> 44100 then Exit;
    if BitsPerSample <> 16 then Exit;
  end;

  //searching for the "data" subchunk:
  while ChunkLenCount < ChunkLen do begin
    if IsStr('data') then begin
      inp.Read(SubChunkLen, 4);
      SetLength(Result, SubChunkLen div 2 + 1);
      word(Result[0]):= PcmHeader.sampleRate;
      inp.ReadBuffer(Result[1], SubChunkLen);
      Exit;
    end
    else begin
      inp. Read(SubChunkLen, 4);
      for i:=1 to SubChunkLen do inp.Read(dud, 1);
    end;
    ChunkLenCount+= SubChunkLen;
  end;
end;
