Skip to content

Commit

Permalink
Add support for writing to MP3 files (#146)
Browse files Browse the repository at this point in the history
* Add support for writing to MP3 files

* Fix linter error

* Simplify bit-depth test

* Fix typo

* Further simplify tests
  • Loading branch information
hagenw authored Jan 29, 2024
1 parent 15355bd commit 9cbc1bc
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 76 deletions.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ ffmpeg_,
sox_,
and mediainfo_.
In addition,
it can write WAV, FLAC, and OGG files.
it can write WAV, FLAC, MP3, and OGG files.

Have a look at the installation_ and usage_ instructions as a starting point.

Expand Down
5 changes: 3 additions & 2 deletions audiofile/core/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,17 +451,18 @@ def write(
"""Write (normalized) audio files.
Save audio data provided as an array of shape ``[channels, samples]``
to a WAV, FLAC, or OGG file.
to a WAV, FLAC, MP3, or OGG file.
``channels`` can be up to 65535 for WAV,
255 for OGG,
2 for MP3,
and 8 for FLAC.
For monaural audio the array can be one-dimensional.
It uses :func:`soundfile.write` to write the audio files.
Args:
file: file name of output audio file.
The format (WAV, FLAC, OGG) will be inferred from the file name
The format (WAV, FLAC, MP3, OGG) will be inferred from the file name
signal: audio data to write
sampling_rate: sample rate of the audio data
bit_depth: bit depth of written file in bit,
Expand Down
1 change: 1 addition & 0 deletions audiofile/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
MAX_CHANNELS = {
'wav': 65535,
'ogg': 255,
'mp3': 2,
'flac': 8,
}
r"""Maximum number of channels per format."""
Expand Down
2 changes: 1 addition & 1 deletion docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ ffmpeg_,
sox_,
and mediainfo_,
if those are available on your system.
In addition, it can create WAV, FLAC, or OGG files.
In addition, it can create WAV, FLAC, MP3, or OGG files.


Write a file
Expand Down
100 changes: 28 additions & 72 deletions tests/test_audiofile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import re
import subprocess

import numpy as np
from numpy.testing import assert_allclose
Expand Down Expand Up @@ -84,21 +83,6 @@ def non_audio_file(tmpdir, request):
os.remove(broken_file)


def convert_to_mp3(infile, outfile, sampling_rate, channels):
"""Convert file to MP3 using ffmpeg."""
subprocess.call(
[
'ffmpeg',
'-i', infile,
'-vn',
'-ar', str(sampling_rate),
'-ac', str(channels),
'-b:a', '192k',
outfile,
]
)


def tolerance(condition, sampling_rate=0):
"""Absolute tolerance for different condition."""
tol = 0
Expand Down Expand Up @@ -319,12 +303,7 @@ def test_convert_to_wav(tmpdir, normalize, bit_depth, file_extension):
channels=channels,
)
infile = str(tmpdir.join(f'signal.{file_extension}'))
if file_extension == 'mp3':
tmpfile = str(tmpdir.join('signal-tmp.wav'))
af.write(tmpfile, signal, sampling_rate, bit_depth=bit_depth)
convert_to_mp3(tmpfile, infile, sampling_rate, channels)
else:
af.write(infile, signal, sampling_rate, bit_depth=bit_depth)
af.write(infile, signal, sampling_rate, bit_depth=bit_depth)
if file_extension == 'wav':
error_msg = (
f"'{infile}' would be overwritten. "
Expand All @@ -343,12 +322,6 @@ def test_convert_to_wav(tmpdir, normalize, bit_depth, file_extension):
normalize=normalize,
overwrite=True,
)
elif file_extension == 'mp3':
outfile = af.convert_to_wav(
infile,
bit_depth=bit_depth,
normalize=normalize,
)
else:
outfile = str(tmpdir.join('signal_converted.wav'))
af.convert_to_wav(
Expand Down Expand Up @@ -473,80 +446,57 @@ def test_magnitude(tmpdir, magnitude, normalize, bit_depth, sampling_rate):
assert type(_magnitude(sig)) is np.float32


@pytest.mark.parametrize('file_type', ['wav', 'flac', 'ogg'])
@pytest.mark.parametrize('file_type', ['wav', 'flac', 'mp3', 'ogg'])
@pytest.mark.parametrize('sampling_rate', [8000, 48000])
@pytest.mark.parametrize("channels", [1, 2, 8, 255])
@pytest.mark.parametrize('magnitude', [0.01])
def test_file_type(tmpdir, file_type, magnitude, sampling_rate, channels):

# Skip unallowed combinations
if file_type == 'flac' and channels > 8:
return None
if file_type == 'mp3' and channels > 2:
return None

file = str(tmpdir.join('signal.' + file_type))
signal = sine(
magnitude=magnitude,
sampling_rate=sampling_rate,
channels=channels,
)
# Skip unallowed combination
if file_type == 'flac' and channels > 8:
return 0
# Allowed combinations
bit_depth = 16
sig, fs = write_and_read(file, signal, sampling_rate, bit_depth=bit_depth)
# Test file type
assert audeer.file_extension(file) == file_type
# Test magnitude
if file_type == 'mp3':
atol = tolerance(8)
else:
atol = tolerance(16)
assert_allclose(
_magnitude(sig),
magnitude,
rtol=0,
atol=tolerance(16),
atol=atol,
)
# Test metadata
info = soundfile.info(file)
assert fs == sampling_rate
assert info.samplerate == sampling_rate
assert _channels(sig) == channels
assert info.channels == channels
assert _samples(sig) == _samples(signal)
assert info.frames == _samples(signal)
if file_type == 'ogg':
bit_depth = None
assert af.bit_depth(file) == bit_depth


@pytest.mark.parametrize('sampling_rate', [8000, 48000])
@pytest.mark.parametrize("channels", [1, 2])
@pytest.mark.parametrize('magnitude', [0.01])
def test_mp3(tmpdir, magnitude, sampling_rate, channels):

signal = sine(magnitude=magnitude,
sampling_rate=sampling_rate,
channels=channels)
# Create wav file and use ffmpeg to convert to mp3
wav_file = str(tmpdir.join('signal.wav'))
mp3_file = str(tmpdir.join('signal.mp3'))
af.write(wav_file, signal, sampling_rate)
convert_to_mp3(wav_file, mp3_file, sampling_rate, channels)
assert audeer.file_extension(mp3_file) == 'mp3'
sig, fs = af.read(mp3_file)
assert fs == sampling_rate
assert _channels(sig) == channels
if channels == 1:
assert sig.ndim == 1
else:
assert sig.ndim == 2
assert af.channels(mp3_file) == _channels(sig)
assert af.sampling_rate(mp3_file) == sampling_rate
assert af.samples(mp3_file) == _samples(sig)
assert af.duration(mp3_file) == _duration(sig, sampling_rate)
assert_allclose(
af.duration(mp3_file, sloppy=True),
_duration(sig, sampling_rate),
rtol=0,
atol=0.2,
)
assert af.bit_depth(mp3_file) is None
assert _samples(sig) == _samples(signal)
assert info.frames == _samples(signal)
if file_type in ['mp3', 'ogg']:
bit_depth = None
assert af.bit_depth(file) == bit_depth


def test_formats():
def test_other_formats():
files = [
'gs-16b-1c-44100hz.opus',
'gs-16b-1c-8000hz.amr',
Expand Down Expand Up @@ -1183,7 +1133,7 @@ def test_read_duration_and_offset_file_formats(tmpdir):
mp3_file = str(tmpdir.join('signal.mp3'))
m4a_file = audeer.path(ASSETS_DIR, 'gs-16b-1c-44100hz.m4a')
af.write(wav_file, signal, sampling_rate)
convert_to_mp3(wav_file, mp3_file, sampling_rate, channels)
af.write(mp3_file, signal, sampling_rate)

for file in [wav_file, mp3_file, m4a_file]:
# Duration and offset in seconds
Expand Down Expand Up @@ -1294,7 +1244,7 @@ def test_read_duration_and_offset_rounding(
# duration of 0 is handled inside af.read()
# even when duration is only 0 after rounding
# as ffmpeg cannot handle those cases
return 0
return None

# sox
convert_file = str(tmpdir.join('signal-sox.wav'))
Expand Down Expand Up @@ -1337,6 +1287,12 @@ def test_write_errors():
)
with pytest.raises(RuntimeError, match=expected_error):
write_and_read('test.flac', np.zeros((9, 100)), sampling_rate)
expected_error = (
"The maximum number of allowed channels "
"for 'mp3' is 2. Consider using 'wav' instead."
)
with pytest.raises(RuntimeError, match=expected_error):
write_and_read('test.mp3', np.zeros((3, 100)), sampling_rate)
expected_error = (
"The maximum number of allowed channels "
"for 'ogg' is 255. Consider using 'wav' instead."
Expand Down

0 comments on commit 9cbc1bc

Please sign in to comment.