converting audio samples using libav*

n0stradamus · 2013-03-29 23:08:14

Hello guys,

I've been looking for a solution for several days by now and have not gotten any smarter, I hope that someone of you is able to help me
What I'm currently working on is an application able to generate audio fingerprints (specifically acoustID fingerprints). To do so, I started with the sample
code that demonstrates the basic fingerprinting capability. You can find it here.
It is part of a library called chromaprint.

As you will see, a lot of this code uses deprecated functions. As a newbie to sound conversion, the sense of a lot of the calculations in the file is oblivious to me
Still, I want to renew the code to use functions that are up-to-date. Ideally I'd understand the code more thorough then, or so I thought.
I started modifiying the decode_audio_file function from fpcalc.c and got until here, where I am now stuck:

int decode_audio_file(ChromaprintContext *chromaprint_ctx, int16_t *buffer1, int16_t *buffer2, const char *file_name, int max_length, int *duration)
{
AVFormatContext *format_ctx = NULL;
    AVCodecContext   *codec_ctx = NULL;
    AVAudioConvert *convert_ctx = NULL;

    AVStream *stream = NULL;
    AVCodec *codec   = NULL;
    AVPacket avpacket;
    AVFrame *decoded_frame = NULL;

    FILE *f;
    int frameFinished = 0;
    int stream_id, ok = 0;
    int buffersize = AVCODEC_MAX_AUDIO_FRAME_SIZE + FF_INPUT_BUFFER_PADDING_SIZE;
    uint8_t inbuf[buffersize];

    /* initialize data packet that is read from the stream */
    av_init_packet(&avpacket);

    avpacket.data = inbuf;
    avpacket.size = buffersize;

    /* make space for frame that contains decoded data */
    decoded_frame = avcodec_alloc_frame();


    /* tell format_ctx about the input */
    if (avformat_open_input(&format_ctx, file_name, NULL, NULL) < 0) {
        fprintf(stderr,"ERROR: couldn't open the file");
        goto done;
    }

    if (avformat_find_stream_info(format_ctx, 0) < 0) {
        fprintf(stderr,"ERROR: couldn't find stream information in the file");
        goto done;
    }

    for (int i = 0; i < format_ctx->nb_streams; ++i) {
        codec_ctx = format_ctx->streams[i]->codec;
        if (codec_ctx && codec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) {
            stream = format_ctx->streams[i];
            break;
        }
    }

    if (!stream) {
        fprintf(stderr,"ERROR: couldn't find any audio stream in the file\n");
        goto done;
    }

    codec = avcodec_find_decoder(codec_ctx->codec_id);

   /* chromaprint expects signed 16 bit samples */
    codec_ctx->request_sample_fmt = AV_SAMPLE_FMT_S16;

    if (codec_ctx->sample_fmt != AV_SAMPLE_FMT_S16) {
        convert_ctx = av_audio_convert_alloc(AV_SAMPLE_FMT_S16, codec_ctx->channels,
                                             codec_ctx->sample_fmt, codec_ctx->channels, NULL, 0);
        if (!convert_ctx) {
            fprintf(stderr, "ERROR: couldn't create sample format converter");
            goto done;
        }
    }

    if (!codec) {
         fprintf(stderr,"ERROR: unknown codec");
        goto done;
    }

    if (avcodec_open2(codec_ctx, codec, NULL) < 0) {
        fprintf(stderr,"Could not open codec\n");
        goto done;
    }

    chromaprint_start(chromaprint_ctx, codec_ctx->sample_rate, codec_ctx->channels);
    *duration = stream->time_base.num * stream->duration / stream->time_base.den;

    int len;
    while (av_read_frame(format_ctx, &avpacket)>=0) {
        if (avpacket.stream_index == stream->id) {
            len = avcodec_decode_audio4(codec_ctx, decoded_frame, &frameFinished, &avpacket);

            if(frameFinished) {
                int data_size = av_samples_get_buffer_size(NULL, codec_ctx->channels,
                                                           decoded_frame->nb_samples,
                                                           codec_ctx->sample_fmt, 1);

                if (convert_ctx) {
                    const void *ibuf[6] = { decoded_frame->data };
                    void *obuf[6] = { buffer2 };
                    int istride[6] = { av_get_bytes_per_sample(codec_ctx->sample_fmt) };
                    int ostride[6] = { 2 };
                    len = data_size / istride[0];

                    if (av_audio_convert(convert_ctx, obuf, ostride, ibuf, istride, 4) < 0) {
                        fprintf(stderr,"WARNING: unable to convert %d samples\n", len);
                        break;
                    }
                    if (!chromaprint_feed(chromaprint_ctx, buffer2, decoded_frame->nb_samples/2 )) {
                        fprintf(stderr,"ERROR: fingerprint calculation failed\n");
                        goto done;
                    }
                } else if (!chromaprint_feed(chromaprint_ctx, decoded_frame->extended_data, decoded_frame->nb_samples)) {
                    fprintf(stderr,"ERROR: fingerprint calculation failed\n");
                    goto done;
                }
            }
        }
    }

    ok = 1;

done:

    avformat_close_input(&format_ctx);
    avcodec_free_frame(&decoded_frame);

    return ok;
}

You can find the documentation for the used chromaprint functions here.
As I said, I am not really sure whether I have understood all of what is going on in the unmodified version of the function, so please bear with me

Any answers or suggestions for reading material will be gladly appreciated!

PS: goto marks will be removed once the code works

Last edited by n0stradamus (2013-03-29 23:10:36)

Arch Linux

#1 2013-03-29 23:08:14

converting audio samples using libav*

Board footer