From 9cbc1bc588f992a4a8564cb4f5d280793a24a272 Mon Sep 17 00:00:00 2001
From: Hagen Wierstorf <hwierstorf@audeering.com>
Date: Mon, 29 Jan 2024 16:19:43 +0100
Subject: [PATCH] Add support for writing to MP3 files (#146)

* Add support for writing to MP3 files

* Fix linter error

* Simplify bit-depth test

* Fix typo

* Further simplify tests
---
 README.rst              |   2 +-
 audiofile/core/io.py    |   5 +-
 audiofile/core/utils.py |   1 +
 docs/usage.rst          |   2 +-
 tests/test_audiofile.py | 100 +++++++++++-----------------------------
 5 files changed, 34 insertions(+), 76 deletions(-)

diff --git a/README.rst b/README.rst
index 9171b9f..12f7b34 100644
--- a/README.rst
+++ b/README.rst
@@ -14,7 +14,7 @@ ffmpeg_,
 sox_,
 and mediainfo_.
 In addition,
-it can write WAV, FLAC, and OGG files.
+it can write WAV, FLAC, MP3, and OGG files.
 
 Have a look at the installation_ and usage_ instructions as a starting point.
 
diff --git a/audiofile/core/io.py b/audiofile/core/io.py
index 64cdc92..64956ec 100644
--- a/audiofile/core/io.py
+++ b/audiofile/core/io.py
@@ -451,9 +451,10 @@ def write(
     """Write (normalized) audio files.
 
     Save audio data provided as an array of shape ``[channels, samples]``
-    to a WAV, FLAC, or OGG file.
+    to a WAV, FLAC, MP3, or OGG file.
     ``channels`` can be up to 65535 for WAV,
     255 for OGG,
+    2 for MP3,
     and 8 for FLAC.
     For monaural audio the array can be one-dimensional.
 
@@ -461,7 +462,7 @@ def write(
 
     Args:
         file: file name of output audio file.
-            The format (WAV, FLAC, OGG) will be inferred from the file name
+            The format (WAV, FLAC, MP3, OGG) will be inferred from the file name
         signal: audio data to write
         sampling_rate: sample rate of the audio data
         bit_depth: bit depth of written file in bit,
diff --git a/audiofile/core/utils.py b/audiofile/core/utils.py
index a40ebff..eab3955 100644
--- a/audiofile/core/utils.py
+++ b/audiofile/core/utils.py
@@ -10,6 +10,7 @@
 MAX_CHANNELS = {
     'wav': 65535,
     'ogg': 255,
+    'mp3': 2,
     'flac': 8,
 }
 r"""Maximum number of channels per format."""
diff --git a/docs/usage.rst b/docs/usage.rst
index 20ada2c..daba831 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -7,7 +7,7 @@ ffmpeg_,
 sox_,
 and mediainfo_,
 if those are available on your system.
-In addition, it can create WAV, FLAC, or OGG files.
+In addition, it can create WAV, FLAC, MP3, or OGG files.
 
 
 Write a file
diff --git a/tests/test_audiofile.py b/tests/test_audiofile.py
index b7c5aaa..3b9b5db 100644
--- a/tests/test_audiofile.py
+++ b/tests/test_audiofile.py
@@ -1,6 +1,5 @@
 import os
 import re
-import subprocess
 
 import numpy as np
 from numpy.testing import assert_allclose
@@ -84,21 +83,6 @@ def non_audio_file(tmpdir, request):
         os.remove(broken_file)
 
 
-def convert_to_mp3(infile, outfile, sampling_rate, channels):
-    """Convert file to MP3 using ffmpeg."""
-    subprocess.call(
-        [
-            'ffmpeg',
-            '-i', infile,
-            '-vn',
-            '-ar', str(sampling_rate),
-            '-ac', str(channels),
-            '-b:a', '192k',
-            outfile,
-        ]
-    )
-
-
 def tolerance(condition, sampling_rate=0):
     """Absolute tolerance for different condition."""
     tol = 0
@@ -319,12 +303,7 @@ def test_convert_to_wav(tmpdir, normalize, bit_depth, file_extension):
         channels=channels,
     )
     infile = str(tmpdir.join(f'signal.{file_extension}'))
-    if file_extension == 'mp3':
-        tmpfile = str(tmpdir.join('signal-tmp.wav'))
-        af.write(tmpfile, signal, sampling_rate, bit_depth=bit_depth)
-        convert_to_mp3(tmpfile, infile, sampling_rate, channels)
-    else:
-        af.write(infile, signal, sampling_rate, bit_depth=bit_depth)
+    af.write(infile, signal, sampling_rate, bit_depth=bit_depth)
     if file_extension == 'wav':
         error_msg = (
             f"'{infile}' would be overwritten. "
@@ -343,12 +322,6 @@ def test_convert_to_wav(tmpdir, normalize, bit_depth, file_extension):
             normalize=normalize,
             overwrite=True,
         )
-    elif file_extension == 'mp3':
-        outfile = af.convert_to_wav(
-            infile,
-            bit_depth=bit_depth,
-            normalize=normalize,
-        )
     else:
         outfile = str(tmpdir.join('signal_converted.wav'))
         af.convert_to_wav(
@@ -473,31 +446,38 @@ def test_magnitude(tmpdir, magnitude, normalize, bit_depth, sampling_rate):
     assert type(_magnitude(sig)) is np.float32
 
 
-@pytest.mark.parametrize('file_type', ['wav', 'flac', 'ogg'])
+@pytest.mark.parametrize('file_type', ['wav', 'flac', 'mp3', 'ogg'])
 @pytest.mark.parametrize('sampling_rate', [8000, 48000])
 @pytest.mark.parametrize("channels", [1, 2, 8, 255])
 @pytest.mark.parametrize('magnitude', [0.01])
 def test_file_type(tmpdir, file_type, magnitude, sampling_rate, channels):
+
+    # Skip unallowed combinations
+    if file_type == 'flac' and channels > 8:
+        return None
+    if file_type == 'mp3' and channels > 2:
+        return None
+
     file = str(tmpdir.join('signal.' + file_type))
     signal = sine(
         magnitude=magnitude,
         sampling_rate=sampling_rate,
         channels=channels,
     )
-    # Skip unallowed combination
-    if file_type == 'flac' and channels > 8:
-        return 0
-    # Allowed combinations
     bit_depth = 16
     sig, fs = write_and_read(file, signal, sampling_rate, bit_depth=bit_depth)
     # Test file type
     assert audeer.file_extension(file) == file_type
     # Test magnitude
+    if file_type == 'mp3':
+        atol = tolerance(8)
+    else:
+        atol = tolerance(16)
     assert_allclose(
         _magnitude(sig),
         magnitude,
         rtol=0,
-        atol=tolerance(16),
+        atol=atol,
     )
     # Test metadata
     info = soundfile.info(file)
@@ -505,48 +485,18 @@ def test_file_type(tmpdir, file_type, magnitude, sampling_rate, channels):
     assert info.samplerate == sampling_rate
     assert _channels(sig) == channels
     assert info.channels == channels
-    assert _samples(sig) == _samples(signal)
-    assert info.frames == _samples(signal)
-    if file_type == 'ogg':
-        bit_depth = None
-    assert af.bit_depth(file) == bit_depth
-
-
-@pytest.mark.parametrize('sampling_rate', [8000, 48000])
-@pytest.mark.parametrize("channels", [1, 2])
-@pytest.mark.parametrize('magnitude', [0.01])
-def test_mp3(tmpdir, magnitude, sampling_rate, channels):
-
-    signal = sine(magnitude=magnitude,
-                  sampling_rate=sampling_rate,
-                  channels=channels)
-    # Create wav file and use ffmpeg to convert to mp3
-    wav_file = str(tmpdir.join('signal.wav'))
-    mp3_file = str(tmpdir.join('signal.mp3'))
-    af.write(wav_file, signal, sampling_rate)
-    convert_to_mp3(wav_file, mp3_file, sampling_rate, channels)
-    assert audeer.file_extension(mp3_file) == 'mp3'
-    sig, fs = af.read(mp3_file)
-    assert fs == sampling_rate
-    assert _channels(sig) == channels
     if channels == 1:
         assert sig.ndim == 1
     else:
         assert sig.ndim == 2
-    assert af.channels(mp3_file) == _channels(sig)
-    assert af.sampling_rate(mp3_file) == sampling_rate
-    assert af.samples(mp3_file) == _samples(sig)
-    assert af.duration(mp3_file) == _duration(sig, sampling_rate)
-    assert_allclose(
-        af.duration(mp3_file, sloppy=True),
-        _duration(sig, sampling_rate),
-        rtol=0,
-        atol=0.2,
-    )
-    assert af.bit_depth(mp3_file) is None
+    assert _samples(sig) == _samples(signal)
+    assert info.frames == _samples(signal)
+    if file_type in ['mp3', 'ogg']:
+        bit_depth = None
+    assert af.bit_depth(file) == bit_depth
 
 
-def test_formats():
+def test_other_formats():
     files = [
         'gs-16b-1c-44100hz.opus',
         'gs-16b-1c-8000hz.amr',
@@ -1183,7 +1133,7 @@ def test_read_duration_and_offset_file_formats(tmpdir):
     mp3_file = str(tmpdir.join('signal.mp3'))
     m4a_file = audeer.path(ASSETS_DIR, 'gs-16b-1c-44100hz.m4a')
     af.write(wav_file, signal, sampling_rate)
-    convert_to_mp3(wav_file, mp3_file, sampling_rate, channels)
+    af.write(mp3_file, signal, sampling_rate)
 
     for file in [wav_file, mp3_file, m4a_file]:
         # Duration and offset in seconds
@@ -1294,7 +1244,7 @@ def test_read_duration_and_offset_rounding(
         # duration of 0 is handled inside af.read()
         # even when duration is only 0 after rounding
         # as ffmpeg cannot handle those cases
-        return 0
+        return None
 
     # sox
     convert_file = str(tmpdir.join('signal-sox.wav'))
@@ -1337,6 +1287,12 @@ def test_write_errors():
     )
     with pytest.raises(RuntimeError, match=expected_error):
         write_and_read('test.flac', np.zeros((9, 100)), sampling_rate)
+    expected_error = (
+        "The maximum number of allowed channels "
+        "for 'mp3' is 2. Consider using 'wav' instead."
+    )
+    with pytest.raises(RuntimeError, match=expected_error):
+        write_and_read('test.mp3', np.zeros((3, 100)), sampling_rate)
     expected_error = (
         "The maximum number of allowed channels "
         "for 'ogg' is 255. Consider using 'wav' instead."