aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSadaf Ebrahimi <sadafebrahimi@google.com>2023-05-04 20:46:48 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2023-05-04 20:46:48 +0000
commit940639fb3f02293159ded0540c3579dea1aafd90 (patch)
tree8aadf66cbde3437f45e51d82dccf398c159f9058
parent0702d7cd0aaf1dcf9e165523bc293ac6eab94648 (diff)
parenta25bf35bd8469ccf58f6024199bd972419d34547 (diff)
downloadsonic-940639fb3f02293159ded0540c3579dea1aafd90.tar.gz
Upgrade sonic to 0555641f2d7e52a3d1720b4ae5affb5d50bdde23 am: 16636af836 am: a99dc1d93e am: b1f4db4bca am: 9d41c7035f am: a25bf35bd8
Original change: https://android-review.googlesource.com/c/platform/external/sonic/+/2578772 Change-Id: I86a4d1af8da51ae11c44bedd6beb1ae4061984a8 Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
-rw-r--r--.gitignore1
-rw-r--r--Android.bp1
-rw-r--r--LICENSE (renamed from COPYING)0
-rw-r--r--METADATA8
-rw-r--r--Main.java7
-rw-r--r--Makefile148
-rw-r--r--README9
-rw-r--r--Sonic.java207
-rw-r--r--TODO13
-rw-r--r--debian/control2
-rw-r--r--doc/index.md4
-rw-r--r--main.c279
-rw-r--r--main_lite.c103
-rw-r--r--sonic.c2085
-rw-r--r--sonic.h192
-rw-r--r--sonic_lite.c371
-rw-r--r--sonic_lite.h52
-rw-r--r--spectrogram.c377
-rw-r--r--wave.c573
-rw-r--r--wave.h10
20 files changed, 2884 insertions, 1558 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1b2081a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.obj
diff --git a/Android.bp b/Android.bp
index 484caa3..82936cd 100644
--- a/Android.bp
+++ b/Android.bp
@@ -16,7 +16,6 @@ license {
"SPDX-license-identifier-Apache-2.0",
],
license_text: [
- "COPYING",
"NOTICE",
],
}
diff --git a/COPYING b/LICENSE
index d645695..d645695 100644
--- a/COPYING
+++ b/LICENSE
diff --git a/METADATA b/METADATA
index 699d18f..c531560 100644
--- a/METADATA
+++ b/METADATA
@@ -7,13 +7,13 @@ description: "Sonic is a simple algorithm for speeding up or slowing down speech
third_party {
url {
type: ARCHIVE
- value: "https://github.com/waywardgeek/sonic/archive/60eeb064f62a106069d2ce148fabd724f9df9780.zip"
+ value: "https://github.com/waywardgeek/sonic/archive/0555641f2d7e52a3d1720b4ae5affb5d50bdde23.zip"
}
- version: "60eeb064f62a106069d2ce148fabd724f9df9780"
+ version: "0555641f2d7e52a3d1720b4ae5affb5d50bdde23"
license_type: NOTICE
last_upgrade_date {
year: 2023
- month: 2
- day: 27
+ month: 5
+ day: 3
}
}
diff --git a/Main.java b/Main.java
index 31a68d4..437857f 100644
--- a/Main.java
+++ b/Main.java
@@ -5,7 +5,6 @@ package sonic;
import java.io.File;
import java.io.IOException;
-
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
@@ -60,14 +59,14 @@ public class Main {
public static void main(
String[] argv) throws UnsupportedAudioFileException, IOException, LineUnavailableException
{
- float speed = 2.0f;
+ float speed = 1.0f;
float pitch = 1.0f;
- float rate = 1.0f;
+ float rate = 1.5f;
float volume = 1.0f;
boolean emulateChordPitch = false;
int quality = 0;
- AudioInputStream stream = AudioSystem.getAudioInputStream(new File("talking.wav"));
+ AudioInputStream stream = AudioSystem.getAudioInputStream(new File("stereo_test.wav"));
AudioFormat format = stream.getFormat();
int sampleRate = (int)format.getSampleRate();
int numChannels = format.getChannels();
diff --git a/Makefile b/Makefile
index 696e211..2f7fd60 100644
--- a/Makefile
+++ b/Makefile
@@ -5,56 +5,136 @@
# safe. We call malloc, and older Linux versions only linked in the thread-safe
# malloc if -pthread is specified.
-SONAME=soname
+# Uncomment this if you want to link in spectrogram generation. It is not
+# needed to adjust speech speed or pitch. It is included primarily to provide
+# high-quality spectrograms with low CPU overhead, for applications such a
+# speech recognition.
+#USE_SPECTROGRAM=1
+
+PREFIX=/usr
+
UNAME := $(shell uname)
ifeq ($(UNAME), Darwin)
- SONAME=install_name
+ PREFIX=/usr/local
endif
-#CFLAGS=-Wall -g -ansi -fPIC -pthread
-CFLAGS=-Wall -O3 -ansi -fPIC -pthread
-LIB_TAG=0.2.0
-CC=gcc
-PREFIX=/usr
+
+BINDIR=$(PREFIX)/bin
LIBDIR=$(PREFIX)/lib
+INCDIR=$(PREFIX)/include
-all: sonic libsonic.so.$(LIB_TAG) libsonic.a
+SONAME=-soname,
+SHARED_OPT=-shared
+LIB_NAME=libsonic.so
+LIB_INTERNAL_NAME=libsonic_internal.so
+LIB_TAG=.0.3.0
-sonic: wave.o main.o libsonic.so.$(LIB_TAG)
- $(CC) $(CFLAGS) -o sonic wave.o main.o libsonic.so.$(LIB_TAG)
+ifeq ($(UNAME), Darwin)
+ SONAME=-install_name,$(LIBDIR)/
+ SHARED_OPT=-dynamiclib
+ LIB_NAME=libsonic.dylib
+ LIB_TAG=
+endif
+
+#CFLAGS=-Wall -Wno-unused-function -g -ansi -fPIC -pthread
+CFLAGS ?= -O3
+CFLAGS += -Wall -Wno-unused-function -ansi -fPIC -pthread
+
+CC=gcc
+
+# Set NO_MALLOC=1 as a parameter to make to compile Sonic with static buffers
+# instead of calling malloc. This is usefule primarily on microcontrollers.
+ifeq ($(NO_MALLOC), 1)
+ CFLAGS+= -DSONIC_NO_MALLOC
+ # Set MAX_MEMORY=<memory size> if you need to incease the static memory buffer
+ ifdef MAX_MEMORY
+ CFLAGS+= -DSONIC_MAX_MEMORY=$(MAX_MEMORY)
+ else
+ CFLAGS+= -DSONIC_MAX_MEMORY=4096
+ endif
+endif
+
+ifdef MIN_PITCH
+ CFLAGS+= -DSONIC_MIN_PITCH=$(MIN_PITCH)
+endif
+
+EXTRA_SRC=
+# Set this to empty if not using spectrograms.
+FFTLIB=
+ifeq ($(USE_SPECTROGRAM), 1)
+ CFLAGS+= -DSONIC_SPECTROGRAM
+ EXTRA_SRC+= spectrogram.c
+ FFTLIB= -L$(LIBDIR) -lfftw3
+endif
+EXTRA_OBJ=$(EXTRA_SRC:.c=.o)
+
+all: sonic sonic_lite $(LIB_NAME)$(LIB_TAG) libsonic.a libsonic_internal.a $(LIB_INTERNAL_NAME)$(LIB_TAG)
+
+sonic: wave.o main.o libsonic.a
+ $(CC) $(CFLAGS) $(LDFLAGS) -o sonic wave.o main.o libsonic.a -lm $(FFTLIB)
+
+sonic_lite: wave.c main_lite.c sonic_lite.c sonic_lite.h
+ $(CC) $(CFLAGS) $(LDFLAGS) -o sonic_lite sonic_lite.c wave.c main_lite.c
sonic.o: sonic.c sonic.h
- $(CC) $(CFLAGS) -c sonic.c
+ $(CC) $(CPPFLAGS) $(CFLAGS) -c sonic.c
+
+# Define a version of sonic with the internal names defined so others (i.e. Speedy)
+# can build new APIs that superscede the default API.
+sonic_internal.o: sonic.c sonic.h
+ $(CC) $(CPPFLAGS) $(CFLAGS) -DSONIC_INTERNAL -DSONIC_SPECTROGRAM -c sonic.c -o sonic_internal.o
wave.o: wave.c wave.h
- $(CC) $(CFLAGS) -c wave.c
+ $(CC) $(CPPFLAGS) $(CFLAGS) -c wave.c
main.o: main.c sonic.h wave.h
- $(CC) $(CFLAGS) -c main.c
+ $(CC) $(CPPFLAGS) $(CFLAGS) -c main.c
+
+spectrogram.o: spectrogram.c sonic.h
+ $(CC) $(CPPFLAGS) $(CFLAGS) -DSONIC_SPECTROGRAM -c spectrogram.c
-libsonic.so.$(LIB_TAG): sonic.o
- $(CC) $(CFLAGS) -shared -Wl,-$(SONAME),libsonic.so.0 sonic.o -o libsonic.so.$(LIB_TAG)
- ln -sf libsonic.so.$(LIB_TAG) libsonic.so
- ln -sf libsonic.so.$(LIB_TAG) libsonic.so.0
+$(LIB_NAME)$(LIB_TAG): $(EXTRA_OBJ) sonic.o wave.o
+ $(CC) $(CFLAGS) $(LDFLAGS) $(SHARED_OPT) -Wl,$(SONAME)$(LIB_NAME) $(EXTRA_OBJ) sonic.o -o $(LIB_NAME)$(LIB_TAG) $(FFTLIB) wave.o
+ifneq ($(UNAME), Darwin)
+ ln -sf $(LIB_NAME)$(LIB_TAG) $(LIB_NAME)
+ ln -sf $(LIB_NAME)$(LIB_TAG) $(LIB_NAME).0
+endif
+
+$(LIB_INTERNAL_NAME)$(LIB_TAG): $(EXTRA_OBJ) sonic_internal.o spectrogram.o wave.o
+ $(CC) $(CFLAGS) $(LDFLAGS) $(SHARED_OPT) -Wl,$(SONAME)$(LIB_INTERNAL_NAME) $(EXTRA_OBJ) sonic_internal.o spectrogram.o -o $(LIB_INTERNAL_NAME)$(LIB_TAG) $(FFTLIB) wave.o
+ifneq ($(UNAME), Darwin)
+ ln -sf $(LIB_INTERNAL_NAME)$(LIB_TAG) $(LIB_INTERNAL_NAME)
+ ln -sf $(LIB_INTERNAL_NAME)$(LIB_TAG) $(LIB_INTERNAL_NAME).0
+endif
+
+libsonic.a: $(EXTRA_OBJ) sonic.o wave.o
+ $(AR) cqs libsonic.a $(EXTRA_OBJ) sonic.o wave.o
-libsonic.a: sonic.o
- $(AR) cqs libsonic.a sonic.o
+# Define a version of sonic with the internal names defined so others (i.e. Speedy)
+# can build new APIs that superscede the default API.
+libsonic_internal.a: $(EXTRA_OBJ) sonic_internal.o wave.o
+ $(AR) cqs libsonic_internal.a $(EXTRA_OBJ) sonic_internal.o wave.o
-install: sonic libsonic.so.$(LIB_TAG) sonic.h
- install -d $(DESTDIR)$(PREFIX)/bin $(DESTDIR)$(PREFIX)/include $(DESTDIR)$(PREFIX)/lib
- install sonic $(DESTDIR)$(PREFIX)/bin
- install sonic.h $(DESTDIR)$(PREFIX)/include
- install libsonic.so.$(LIB_TAG) $(DESTDIR)$(PREFIX)/lib
+install: sonic $(LIB_NAME)$(LIB_TAG) sonic.h
+ install -d $(DESTDIR)$(BINDIR) $(DESTDIR)$(INCDIR) $(DESTDIR)$(LIBDIR)
+ install sonic $(DESTDIR)$(BINDIR)
+ install sonic.h $(DESTDIR)$(INCDIR)
install libsonic.a $(DESTDIR)$(LIBDIR)
- ln -sf libsonic.so.$(LIB_TAG) $(DESTDIR)$(PREFIX)/lib/libsonic.so
- ln -sf libsonic.so.$(LIB_TAG) $(DESTDIR)$(PREFIX)/lib/libsonic.so.0
-
-uninstall:
- rm -f $(DESTDIR)$(PREFIX)/bin/sonic
- rm -f $(DESTDIR)$(PREFIX)/include/sonic.h
- rm -f $(DESTDIR)$(PREFIX)/lib/libsonic.so.$(LIB_TAG)
- rm -f $(DESTDIR)$(PREFIX)/lib/libsonic.so
- rm -f $(DESTDIR)$(PREFIX)/lib/libsonic.so.0
+ install $(LIB_NAME)$(LIB_TAG) $(DESTDIR)$(LIBDIR)
+ifneq ($(UNAME), Darwin)
+ ln -sf $(LIB_NAME)$(LIB_TAG) $(DESTDIR)$(LIBDIR)/$(LIB_NAME)
+ ln -sf $(LIB_NAME)$(LIB_TAG) $(DESTDIR)$(LIBDIR)/$(LIB_NAME).0
+endif
+
+uninstall:
+ rm -f $(DESTDIR)$(BINDIR)/sonic
+ rm -f $(DESTDIR)$(INCDIR)/sonic.h
rm -f $(DESTDIR)$(LIBDIR)/libsonic.a
+ rm -f $(DESTDIR)$(LIBDIR)/$(LIB_NAME)$(LIB_TAG)
+ rm -f $(DESTDIR)$(LIBDIR)/$(LIB_NAME).0
+ rm -f $(DESTDIR)$(LIBDIR)/$(LIB_NAME)
clean:
- rm -f *.o sonic libsonic.so* libsonic.a
+ rm -f *.o sonic sonic_lite $(LIB_NAME)* libsonic.a libsonic_internal.a test.wav
+
+check:
+ ./sonic -s 2.0 ./samples/talking.wav ./test.wav
diff --git a/README b/README
index 9a0b9fe..243bdc9 100644
--- a/README
+++ b/README
@@ -32,5 +32,14 @@ real 0m52.043s
user 0m51.190s
sys 0m0.310s
+Update, May 7, 2017
+-------------------
+I upgraded the pitch change algorithm to use a 12-point sinc FIR filter for
+interpolation, rather than linearly interpolating between points. This
+significantly reduces noise introduced by the pitch change algorithm. It is
+most noticable in low-sample-rate streams, such as the 11,025 Hz output of the
+Eloquence TTS engine. The upgrade is in both the C and Java versions.
+
+
Author: Bill Cox
email: waywardgeek@gmail.com
diff --git a/Sonic.java b/Sonic.java
index a3394d7..3a25940 100644
--- a/Sonic.java
+++ b/Sonic.java
@@ -10,10 +10,66 @@ package sonic;
public class Sonic {
- private static final int SONIC_MIN_PITCH = 65;
- private static final int SONIC_MAX_PITCH = 400;
- /* This is used to down-sample some inputs to improve speed */
- private static final int SONIC_AMDF_FREQ = 4000;
+ private static final int SONIC_MIN_PITCH = 65;
+ private static final int SONIC_MAX_PITCH = 400;
+ // This is used to down-sample some inputs to improve speed
+ private static final int SONIC_AMDF_FREQ = 4000;
+ // The number of points to use in the sinc FIR filter for resampling.
+ private static final int SINC_FILTER_POINTS = 12;
+ private static final int SINC_TABLE_SIZE = 601;
+
+ // Lookup table for windowed sinc function of SINC_FILTER_POINTS points.
+ // The code to generate this is in the header comment of sonic.c.
+ private static final short sincTable[] = {
+ 0, 0, 0, 0, 0, 0, 0, -1, -1, -2, -2, -3, -4, -6, -7, -9, -10, -12, -14,
+ -17, -19, -21, -24, -26, -29, -32, -34, -37, -40, -42, -44, -47, -48, -50,
+ -51, -52, -53, -53, -53, -52, -50, -48, -46, -43, -39, -34, -29, -22, -16,
+ -8, 0, 9, 19, 29, 41, 53, 65, 79, 92, 107, 121, 137, 152, 168, 184, 200,
+ 215, 231, 247, 262, 276, 291, 304, 317, 328, 339, 348, 357, 363, 369, 372,
+ 374, 375, 373, 369, 363, 355, 345, 332, 318, 300, 281, 259, 234, 208, 178,
+ 147, 113, 77, 39, 0, -41, -85, -130, -177, -225, -274, -324, -375, -426,
+ -478, -530, -581, -632, -682, -731, -779, -825, -870, -912, -951, -989,
+ -1023, -1053, -1080, -1104, -1123, -1138, -1149, -1154, -1155, -1151,
+ -1141, -1125, -1105, -1078, -1046, -1007, -963, -913, -857, -796, -728,
+ -655, -576, -492, -403, -309, -210, -107, 0, 111, 225, 342, 462, 584, 708,
+ 833, 958, 1084, 1209, 1333, 1455, 1575, 1693, 1807, 1916, 2022, 2122, 2216,
+ 2304, 2384, 2457, 2522, 2579, 2625, 2663, 2689, 2706, 2711, 2705, 2687,
+ 2657, 2614, 2559, 2491, 2411, 2317, 2211, 2092, 1960, 1815, 1658, 1489,
+ 1308, 1115, 912, 698, 474, 241, 0, -249, -506, -769, -1037, -1310, -1586,
+ -1864, -2144, -2424, -2703, -2980, -3254, -3523, -3787, -4043, -4291,
+ -4529, -4757, -4972, -5174, -5360, -5531, -5685, -5819, -5935, -6029,
+ -6101, -6150, -6175, -6175, -6149, -6096, -6015, -5905, -5767, -5599,
+ -5401, -5172, -4912, -4621, -4298, -3944, -3558, -3141, -2693, -2214,
+ -1705, -1166, -597, 0, 625, 1277, 1955, 2658, 3386, 4135, 4906, 5697, 6506,
+ 7332, 8173, 9027, 9893, 10769, 11654, 12544, 13439, 14335, 15232, 16128,
+ 17019, 17904, 18782, 19649, 20504, 21345, 22170, 22977, 23763, 24527,
+ 25268, 25982, 26669, 27327, 27953, 28547, 29107, 29632, 30119, 30569,
+ 30979, 31349, 31678, 31964, 32208, 32408, 32565, 32677, 32744, 32767,
+ 32744, 32677, 32565, 32408, 32208, 31964, 31678, 31349, 30979, 30569,
+ 30119, 29632, 29107, 28547, 27953, 27327, 26669, 25982, 25268, 24527,
+ 23763, 22977, 22170, 21345, 20504, 19649, 18782, 17904, 17019, 16128,
+ 15232, 14335, 13439, 12544, 11654, 10769, 9893, 9027, 8173, 7332, 6506,
+ 5697, 4906, 4135, 3386, 2658, 1955, 1277, 625, 0, -597, -1166, -1705,
+ -2214, -2693, -3141, -3558, -3944, -4298, -4621, -4912, -5172, -5401,
+ -5599, -5767, -5905, -6015, -6096, -6149, -6175, -6175, -6150, -6101,
+ -6029, -5935, -5819, -5685, -5531, -5360, -5174, -4972, -4757, -4529,
+ -4291, -4043, -3787, -3523, -3254, -2980, -2703, -2424, -2144, -1864,
+ -1586, -1310, -1037, -769, -506, -249, 0, 241, 474, 698, 912, 1115, 1308,
+ 1489, 1658, 1815, 1960, 2092, 2211, 2317, 2411, 2491, 2559, 2614, 2657,
+ 2687, 2705, 2711, 2706, 2689, 2663, 2625, 2579, 2522, 2457, 2384, 2304,
+ 2216, 2122, 2022, 1916, 1807, 1693, 1575, 1455, 1333, 1209, 1084, 958, 833,
+ 708, 584, 462, 342, 225, 111, 0, -107, -210, -309, -403, -492, -576, -655,
+ -728, -796, -857, -913, -963, -1007, -1046, -1078, -1105, -1125, -1141,
+ -1151, -1155, -1154, -1149, -1138, -1123, -1104, -1080, -1053, -1023, -989,
+ -951, -912, -870, -825, -779, -731, -682, -632, -581, -530, -478, -426,
+ -375, -324, -274, -225, -177, -130, -85, -41, 0, 39, 77, 113, 147, 178,
+ 208, 234, 259, 281, 300, 318, 332, 345, 355, 363, 369, 373, 375, 374, 372,
+ 369, 363, 357, 348, 339, 328, 317, 304, 291, 276, 262, 247, 231, 215, 200,
+ 184, 168, 152, 137, 121, 107, 92, 79, 65, 53, 41, 29, 19, 9, 0, -8, -16,
+ -22, -29, -34, -39, -43, -46, -48, -50, -52, -53, -53, -53, -52, -51, -50,
+ -48, -47, -44, -42, -40, -37, -34, -32, -29, -26, -24, -21, -19, -17, -14,
+ -12, -10, -9, -7, -6, -4, -3, -2, -2, -1, -1, 0, 0, 0, 0, 0, 0, 0
+ };
private short inputBuffer[];
private short outputBuffer[];
@@ -41,34 +97,31 @@ public class Sonic {
private int sampleRate;
private int prevPeriod;
private int prevMinDiff;
+ private int minDiff;
+ private int maxDiff;
// Resize the array.
private short[] resize(
- short[] oldArray,
- int newLength)
+ short[] oldArray,
+ int newLength)
{
- newLength *= numChannels;
- short[] newArray = new short[newLength];
+ newLength *= numChannels;
+ short[] newArray = new short[newLength];
int length = oldArray.length <= newLength? oldArray.length : newLength;
-
-
- for(int x = 0; x < length; x++) {
- newArray[x] = oldArray[x];
- }
+
+ System.arraycopy(oldArray, 0, newArray, 0, length);
return newArray;
}
// Move samples from one array to another. May move samples down within an array, but not up.
private void move(
- short dest[],
- int destPos,
- short source[],
- int sourcePos,
- int numSamples)
+ short dest[],
+ int destPos,
+ short source[],
+ int sourcePos,
+ int numSamples)
{
- for(int xSample = 0; xSample < numSamples*numChannels; xSample++) {
- dest[destPos*numChannels + xSample] = source[sourcePos*numChannels + xSample];
- }
+ System.arraycopy(source, sourcePos*numChannels, dest, destPos*numChannels, numSamples*numChannels);
}
// Scale the samples by the factor.
@@ -78,11 +131,13 @@ public class Sonic {
int numSamples,
float volume)
{
+ // Convert volume to fixed-point, with a 12 bit fraction.
int fixedPointVolume = (int)(volume*4096.0f);
int start = position*numChannels;
int stop = start + numSamples*numChannels;
for(int xSample = start; xSample < stop; xSample++) {
+ // Convert back from fixed point to 16-bit integer.
int value = (samples[xSample]*fixedPointVolume) >> 12;
if(value > 32767) {
value = 32767;
@@ -296,7 +351,7 @@ public class Sonic {
enlargeInputBufferIfNeeded(numSamples);
int xBuffer = numInputSamples*numChannels;
for(int xSample = 0; xSample < numSamples*numChannels; xSample++) {
- sample = (short)((samples[xSample] & 0xff) - 128); // Convert from unsigned to signed
+ sample = (short)((samples[xSample] & 0xff) - 128); // Convert from unsigned to signed
inputBuffer[xBuffer++] = (short) (sample << 8);
}
numInputSamples += numSamples;
@@ -307,13 +362,13 @@ public class Sonic {
byte inBuffer[],
int numBytes)
{
- int numSamples = numBytes/(2*numChannels);
+ int numSamples = numBytes/(2*numChannels);
short sample;
enlargeInputBufferIfNeeded(numSamples);
int xBuffer = numInputSamples*numChannels;
for(int xByte = 0; xByte + 1 < numBytes; xByte += 2) {
- sample = (short)((inBuffer[xByte] & 0xff) | (inBuffer[xByte + 1] << 8));
+ sample = (short)((inBuffer[xByte] & 0xff) | (inBuffer[xByte + 1] << 8));
inputBuffer[xBuffer++] = sample;
}
numInputSamples += numSamples;
@@ -371,7 +426,7 @@ public class Sonic {
numSamples = maxSamples;
}
for(int xSample = 0; xSample < numSamples*numChannels; xSample++) {
- samples[xSample++] = (outputBuffer[xSample])/32767.0f;
+ samples[xSample] = (outputBuffer[xSample])/32767.0f;
}
move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples);
numOutputSamples = remainingSamples;
@@ -417,7 +472,7 @@ public class Sonic {
numSamples = maxSamples;
}
for(int xSample = 0; xSample < numSamples*numChannels; xSample++) {
- samples[xSample] = (byte)((outputBuffer[xSample] >> 8) + 128);
+ samples[xSample] = (byte)((outputBuffer[xSample] >> 8) + 128);
}
move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples);
numOutputSamples = remainingSamples;
@@ -430,7 +485,7 @@ public class Sonic {
byte outBuffer[],
int maxBytes)
{
- int maxSamples = maxBytes/(2*numChannels);
+ int maxSamples = maxBytes/(2*numChannels);
int numSamples = numOutputSamples;
int remainingSamples = 0;
@@ -442,9 +497,9 @@ public class Sonic {
numSamples = maxSamples;
}
for(int xSample = 0; xSample < numSamples*numChannels; xSample++) {
- short sample = outputBuffer[xSample];
- outBuffer[xSample << 1] = (byte)(sample & 0xff);
- outBuffer[(xSample << 1) + 1] = (byte)(sample >> 8);
+ short sample = outputBuffer[xSample];
+ outBuffer[xSample << 1] = (byte)(sample & 0xff);
+ outBuffer[(xSample << 1) + 1] = (byte)(sample >> 8);
}
move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples);
numOutputSamples = remainingSamples;
@@ -508,15 +563,12 @@ public class Sonic {
}
// Find the best frequency match in the range, and given a sample skip multiple.
- // For now, just find the pitch of the first channel. Note that retMinDiff and
- // retMaxDiff are Int objects, which the caller will need to create with new.
+ // For now, just find the pitch of the first channel.
private int findPitchPeriodInRange(
short samples[],
int position,
int minPeriod,
- int maxPeriod,
- Integer retMinDiff,
- Integer retMaxDiff)
+ int maxPeriod)
{
int bestPeriod = 0, worstPeriod = 255;
int minDiff = 1, maxDiff = 0;
@@ -541,15 +593,15 @@ public class Sonic {
worstPeriod = period;
}
}
- retMinDiff = minDiff/bestPeriod;
- retMaxDiff = maxDiff/worstPeriod;
+ this.minDiff = minDiff/bestPeriod;
+ this.maxDiff = maxDiff/worstPeriod;
+
return bestPeriod;
}
// At abrupt ends of voiced words, we can have pitch periods that are better
// approximated by the previous pitch period estimate. Try to detect this case.
private boolean prevPeriodBetter(
- int period,
int minDiff,
int maxDiff,
boolean preferNewPeriod)
@@ -583,8 +635,6 @@ public class Sonic {
int position,
boolean preferNewPeriod)
{
- Integer minDiff = new Integer(0);
- Integer maxDiff = new Integer(0);
int period, retPeriod;
int skip = 1;
@@ -592,11 +642,11 @@ public class Sonic {
skip = sampleRate/SONIC_AMDF_FREQ;
}
if(numChannels == 1 && skip == 1) {
- period = findPitchPeriodInRange(samples, position, minPeriod, maxPeriod, minDiff, maxDiff);
+ period = findPitchPeriodInRange(samples, position, minPeriod, maxPeriod);
} else {
downSampleInput(samples, position, skip);
period = findPitchPeriodInRange(downSampleBuffer, 0, minPeriod/skip,
- maxPeriod/skip, minDiff, maxDiff);
+ maxPeriod/skip);
if(skip != 1) {
period *= skip;
int minP = period - (skip << 2);
@@ -608,14 +658,14 @@ public class Sonic {
maxP = maxPeriod;
}
if(numChannels == 1) {
- period = findPitchPeriodInRange(samples, position, minP, maxP, minDiff, maxDiff);
+ period = findPitchPeriodInRange(samples, position, minP, maxP);
} else {
downSampleInput(samples, position, 1);
- period = findPitchPeriodInRange(downSampleBuffer, 0, minP, maxP, minDiff, maxDiff);
+ period = findPitchPeriodInRange(downSampleBuffer, 0, minP, maxP);
}
}
}
- if(prevPeriodBetter(period, minDiff, maxDiff, preferNewPeriod)) {
+ if(prevPeriodBetter(minDiff, maxDiff, preferNewPeriod)) {
retPeriod = prevPeriod;
} else {
retPeriod = period;
@@ -728,11 +778,11 @@ public class Sonic {
enlargeOutputBufferIfNeeded(newPeriod);
if(pitch >= 1.0f) {
overlapAdd(newPeriod, numChannels, outputBuffer, numOutputSamples, pitchBuffer,
- position, pitchBuffer, position + period - newPeriod);
+ position, pitchBuffer, position + period - newPeriod);
} else {
separation = newPeriod - period;
overlapAddWithSeparation(period, numChannels, separation, outputBuffer, numOutputSamples,
- pitchBuffer, position, pitchBuffer, position);
+ pitchBuffer, position, pitchBuffer, position);
}
numOutputSamples += newPeriod;
position += period;
@@ -740,22 +790,60 @@ public class Sonic {
removePitchSamples(position);
}
+ // Approximate the sinc function times a Hann window from the sinc table.
+ private int findSincCoefficient(int i, int ratio, int width) {
+ int lobePoints = (SINC_TABLE_SIZE-1)/SINC_FILTER_POINTS;
+ int left = i*lobePoints + (ratio*lobePoints)/width;
+ int right = left + 1;
+ int position = i*lobePoints*width + ratio*lobePoints - left*width;
+ int leftVal = sincTable[left];
+ int rightVal = sincTable[right];
+
+ return ((leftVal*(width - position) + rightVal*position) << 1)/width;
+ }
+
+ // Return 1 if value >= 0, else -1. This represents the sign of value.
+ private int getSign(int value) {
+ return value >= 0? 1 : -1;
+ }
+
// Interpolate the new output sample.
private short interpolate(
short in[],
- int inPos,
+ int inPos, // Index to first sample which already includes channel offset.
int oldSampleRate,
int newSampleRate)
{
- short left = in[inPos*numChannels];
- short right = in[inPos*numChannels + numChannels];
+ // Compute N-point sinc FIR-filter here. Clip rather than overflow.
+ int i;
+ int total = 0;
int position = newRatePosition*oldSampleRate;
int leftPosition = oldRatePosition*newSampleRate;
int rightPosition = (oldRatePosition + 1)*newSampleRate;
- int ratio = rightPosition - position;
+ int ratio = rightPosition - position - 1;
int width = rightPosition - leftPosition;
-
- return (short)((ratio*left + (width - ratio)*right)/width);
+ int weight, value;
+ int oldSign;
+ int overflowCount = 0;
+
+ for (i = 0; i < SINC_FILTER_POINTS; i++) {
+ weight = findSincCoefficient(i, ratio, width);
+ /* printf("%u %f\n", i, weight); */
+ value = in[inPos + i*numChannels]*weight;
+ oldSign = getSign(total);
+ total += value;
+ if (oldSign != getSign(total) && getSign(value) == oldSign) {
+ /* We must have overflowed. This can happen with a sinc filter. */
+ overflowCount += oldSign;
+ }
+ }
+ /* It is better to clip than to wrap if there was a overflow. */
+ if (overflowCount > 0) {
+ return Short.MAX_VALUE;
+ } else if (overflowCount < 0) {
+ return Short.MIN_VALUE;
+ }
+ return (short)(total >> 16);
}
// Change the rate.
@@ -766,6 +854,7 @@ public class Sonic {
int newSampleRate = (int)(sampleRate/rate);
int oldSampleRate = sampleRate;
int position;
+ int N = SINC_FILTER_POINTS;
// Set these values to help with the integer math
while(newSampleRate > (1 << 14) || oldSampleRate > (1 << 14)) {
@@ -776,13 +865,13 @@ public class Sonic {
return;
}
moveNewSamplesToPitchBuffer(originalNumOutputSamples);
- // Leave at least one pitch sample in the buffer
- for(position = 0; position < numPitchSamples - 1; position++) {
+ // Leave at least N pitch samples in the buffer
+ for(position = 0; position < numPitchSamples - N; position++) {
while((oldRatePosition + 1)*newSampleRate > newRatePosition*oldSampleRate) {
enlargeOutputBufferIfNeeded(1);
for(int i = 0; i < numChannels; i++) {
- outputBuffer[numOutputSamples*numChannels + i] = interpolate(pitchBuffer, position + i,
- oldSampleRate, newSampleRate);
+ outputBuffer[numOutputSamples*numChannels + i] = interpolate(pitchBuffer,
+ position*numChannels + i, oldSampleRate, newSampleRate);
}
newRatePosition++;
numOutputSamples++;
@@ -818,7 +907,7 @@ public class Sonic {
}
enlargeOutputBufferIfNeeded(newSamples);
overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples, samples, position,
- samples, position + period);
+ samples, position + period);
numOutputSamples += newSamples;
return newSamples;
}
@@ -841,7 +930,7 @@ public class Sonic {
enlargeOutputBufferIfNeeded(period + newSamples);
move(outputBuffer, numOutputSamples, samples, position, period);
overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples + period, samples,
- position + period, samples, position);
+ position + period, samples, position);
numOutputSamples += period + newSamples;
return newSamples;
}
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..bacb3e5
--- /dev/null
+++ b/TODO
@@ -0,0 +1,13 @@
+This project still needs some enhancements:
+
+Security hardening
+------------------
+This project should be fuzzed, not just for sound inputs, but for settings
+outside normal ranges, such as setting the speed to 1.0e10. Sonic should be
+secure against attacker-controlled audio signals . However libsonic on some
+systems may run at elevated privileges, and may be controlled through APIs
+available in user space.
+
+Unit tests
+----------
+Sonic is now widely used, and should be properly covered by unit tests.
diff --git a/debian/control b/debian/control
index caeba95..f756ce7 100644
--- a/debian/control
+++ b/debian/control
@@ -25,7 +25,7 @@ Description: Simple library to speed up or slow down speech
This package contains just the actual library.
libsonic is a very simple library for speeding up or slowing
down speech. It has only basic dependencies, and is meant to
- work on both Linux destop machines and embedded systems.
+ work on both Linux desktop machines and embedded systems.
The key new feature in Sonic versus other libraries is very
high quality at speed up factors well over 2X.
diff --git a/doc/index.md b/doc/index.md
index 1e02141..855abe0 100644
--- a/doc/index.md
+++ b/doc/index.md
@@ -39,7 +39,7 @@ Sonic is Copyright 2010, 2011, Bill Cox, all rights reserved. It is released
as under the Apache 2.0 license. Feel free to contact me at
<waywardgeek@gmail.com>. One user was concerned about patents. I believe the
sonic algorithms do not violate any patents, as most of it is very old, based
-on [PICOLA](http://keizai.yokkaichi-u.ac.jp/~ikeda/research/picola.html), and
+on [PICOLA](https://web.archive.org/web/20120731100136/http://keizai.yokkaichi-u.ac.jp/~ikeda/research/picola.html), and
the new part, for greater than 2X speed up, is clearly a capability most
developers ignore, and would not bother to patent.
@@ -62,7 +62,7 @@ implementation of PICOLA is available in the spandsp library. I find the one in
RockBox quite good, though it's limited to 2X speed up. So far as I know, only
sonic is optimized for speed factors needed by the blind, up to 6X.
-Sonic does all of it's CPU intensive work with integer math, and works well on
+Sonic does all of its CPU intensive work with integer math, and works well on
ARM CPUs without FPUs. It supports multiple channels (stereo), and is also able
to change the pitch of a voice. It works well in streaming audio applications,
and can deal with sound streams in 16-bit signed integer, 32-bit floating point,
diff --git a/main.c b/main.c
index 77829c2..cc6371d 100644
--- a/main.c
+++ b/main.c
@@ -2,7 +2,7 @@
2.0 license.
This file is meant as a simple example for how to use libsonic. It is also a
- useful utility on it's own, which can speed up or slow down wav files, change
+ useful utility on its own, which can speed up or slow down wav files, change
pitch, and scale volume. */
#include <stdio.h>
@@ -14,126 +14,175 @@
#define BUFFER_SIZE 2048
/* Run sonic. */
-static void runSonic(
- waveFile inFile,
- waveFile outFile,
- float speed,
- float pitch,
- float rate,
- float volume,
- int emulateChordPitch,
- int quality,
- int sampleRate,
- int numChannels)
-{
- sonicStream stream = sonicCreateStream(sampleRate, numChannels);
- short inBuffer[BUFFER_SIZE], outBuffer[BUFFER_SIZE];
- int samplesRead, samplesWritten;
+static void runSonic(char* inFileName, char* outFileName, float speed,
+ float pitch, float rate, float volume, int outputSampleRate,
+ int emulateChordPitch, int quality, int computeSpectrogram,
+ int numRows, int numCols) {
+ waveFile inFile, outFile = NULL;
+ sonicStream stream;
+ short inBuffer[BUFFER_SIZE], outBuffer[BUFFER_SIZE];
+ int sampleRate, numChannels, samplesRead, samplesWritten;
- sonicSetSpeed(stream, speed);
- sonicSetPitch(stream, pitch);
- sonicSetRate(stream, rate);
- sonicSetVolume(stream, volume);
- sonicSetChordPitch(stream, emulateChordPitch);
- sonicSetQuality(stream, quality);
- do {
- samplesRead = readFromWaveFile(inFile, inBuffer, BUFFER_SIZE/numChannels);
- if(samplesRead == 0) {
- sonicFlushStream(stream);
- } else {
- sonicWriteShortToStream(stream, inBuffer, samplesRead);
- }
- do {
- samplesWritten = sonicReadShortFromStream(stream, outBuffer,
- BUFFER_SIZE/numChannels);
- if(samplesWritten > 0) {
- writeToWaveFile(outFile, outBuffer, samplesWritten);
- }
- } while(samplesWritten > 0);
- } while(samplesRead > 0);
- sonicDestroyStream(stream);
+ inFile = openInputWaveFile(inFileName, &sampleRate, &numChannels);
+ if (outputSampleRate != 0) {
+ sampleRate = outputSampleRate;
+ }
+ if (inFile == NULL) {
+ fprintf(stderr, "Unable to read wave file %s\n", inFileName);
+ exit(1);
+ }
+ if (!computeSpectrogram) {
+ outFile = openOutputWaveFile(outFileName, sampleRate, numChannels);
+ if (outFile == NULL) {
+ closeWaveFile(inFile);
+ fprintf(stderr, "Unable to open wave file %s for writing\n", outFileName);
+ exit(1);
+ }
+ }
+ stream = sonicCreateStream(sampleRate, numChannels);
+ sonicSetSpeed(stream, speed);
+ sonicSetPitch(stream, pitch);
+ sonicSetRate(stream, rate);
+ sonicSetVolume(stream, volume);
+ sonicSetChordPitch(stream, emulateChordPitch);
+ sonicSetQuality(stream, quality);
+#ifdef SONIC_SPECTROGRAM
+ if (computeSpectrogram) {
+ sonicComputeSpectrogram(stream);
+ }
+#endif /* SONIC_SPECTROGRAM */
+ do {
+ samplesRead = readFromWaveFile(inFile, inBuffer, BUFFER_SIZE / numChannels);
+ if (samplesRead == 0) {
+ sonicFlushStream(stream);
+ } else {
+ sonicWriteShortToStream(stream, inBuffer, samplesRead);
+ }
+ if (!computeSpectrogram) {
+ do {
+ samplesWritten = sonicReadShortFromStream(stream, outBuffer,
+ BUFFER_SIZE / numChannels);
+ if (samplesWritten > 0 && !computeSpectrogram) {
+ writeToWaveFile(outFile, outBuffer, samplesWritten);
+ }
+ } while (samplesWritten > 0);
+ }
+ } while (samplesRead > 0);
+#ifdef SONIC_SPECTROGRAM
+ if (computeSpectrogram) {
+ sonicSpectrogram spectrogram = sonicGetSpectrogram(stream);
+ sonicBitmap bitmap =
+ sonicConvertSpectrogramToBitmap(spectrogram, numRows, numCols);
+ sonicWritePGM(bitmap, outFileName);
+ sonicDestroyBitmap(bitmap);
+ }
+#endif /* SONIC_SPECTROGRAM */
+ sonicDestroyStream(stream);
+ closeWaveFile(inFile);
+ if (!computeSpectrogram) {
+ closeWaveFile(outFile);
+ }
}
/* Print the usage. */
-static void usage(void)
-{
- fprintf(stderr, "Usage: sonic [OPTION]... infile outfile\n"
- " -c -- Modify pitch by emulating vocal chords vibrating\n"
- " faster or slower.\n"
- " -p pitch -- Set pitch scaling factor. 1.3 means 30%% higher.\n"
- " -q -- Disable speed-up heuristics. May increase quality.\n"
- " -r rate -- Set playback rate. 2.0 means 2X faster, and 2X pitch.\n"
- " -s speed -- Set speed up factor. 2.0 means 2X faster.\n"
- " -v volume -- Scale volume by a constant factor.\n");
- exit(1);
+static void usage(void) {
+ fprintf(
+ stderr,
+ "Usage: sonic [OPTION]... infile outfile\n"
+ " -c -- Modify pitch by emulating vocal chords vibrating\n"
+ " faster or slower.\n"
+ " -o -- Override the sample rate of the output. -o 44200\n"
+ " on an input file at 22100 KHz will play twice as fast\n"
+ " and have twice the pitch.\n"
+ " -p pitch -- Set pitch scaling factor. 1.3 means 30%% higher.\n"
+ " -q -- Disable speed-up heuristics. May increase quality.\n"
+ " -r rate -- Set playback rate. 2.0 means 2X faster, and 2X "
+ "pitch.\n"
+ " -s speed -- Set speed up factor. 2.0 means 2X faster.\n"
+#ifdef SONIC_SPECTROGRAM
+ " -S width height -- Write a spectrogram in outfile in PGM format.\n"
+#endif /* SONIC_SPECTROGRAM */
+ " -v volume -- Scale volume by a constant factor.\n");
+ exit(1);
}
-int main(
- int argc,
- char **argv)
-{
- waveFile inFile, outFile;
- char *inFileName, *outFileName;
- float speed = 1.0f;
- float pitch = 1.0f;
- float rate = 1.0f;
- float volume = 1.0f;
- int emulateChordPitch = 0;
- int quality = 0;
- int sampleRate, numChannels;
- int xArg = 1;
+int main(int argc, char** argv) {
+ char* inFileName;
+ char* outFileName;
+ float speed = 1.0f;
+ float pitch = 1.0f;
+ float rate = 1.0f;
+ float volume = 1.0f;
+ int outputSampleRate = 0; /* Means use the input file sample rate. */
+ int emulateChordPitch = 0;
+ int quality = 0;
+ int xArg = 1;
+ int computeSpectrogram = 0;
+ int numRows = 0, numCols = 0;
- while(xArg < argc && *(argv[xArg]) == '-') {
- if(!strcmp(argv[xArg], "-c")) {
- emulateChordPitch = 1;
- printf("Scaling pitch linearly.\n");
- } else if(!strcmp(argv[xArg], "-p")) {
- xArg++;
- if(xArg < argc) {
- pitch = atof(argv[xArg]);
- printf("Setting pitch to %0.2fX\n", pitch);
- }
- } else if(!strcmp(argv[xArg], "-q")) {
- quality = 1;
- printf("Disabling speed-up heuristics\n");
- } else if(!strcmp(argv[xArg], "-r")) {
- xArg++;
- if(xArg < argc) {
- rate = atof(argv[xArg]);
- printf("Setting rate to %0.2fX\n", rate);
- }
- } else if(!strcmp(argv[xArg], "-s")) {
- xArg++;
- if(xArg < argc) {
- speed = atof(argv[xArg]);
- printf("Setting speed to %0.2fX\n", speed);
- }
- } else if(!strcmp(argv[xArg], "-v")) {
- xArg++;
- if(xArg < argc) {
- volume = atof(argv[xArg]);
- printf("Setting volume to %0.2f\n", volume);
- }
- }
- xArg++;
- }
- if(argc - xArg != 2) {
- usage();
- }
- inFileName = argv[xArg];
- outFileName = argv[xArg + 1];
- inFile = openInputWaveFile(inFileName, &sampleRate, &numChannels);
- if(inFile == NULL) {
- return 1;
+ while (xArg < argc && *(argv[xArg]) == '-') {
+ if (!strcmp(argv[xArg], "-c")) {
+ emulateChordPitch = 1;
+ printf("Scaling pitch linearly.\n");
+ } else if (!strcmp(argv[xArg], "-o")) {
+ xArg++;
+ if (xArg < argc) {
+ outputSampleRate = atoi(argv[xArg]);
+ printf("Setting output sample rate to %d\n", outputSampleRate);
+ }
+ } else if (!strcmp(argv[xArg], "-p")) {
+ xArg++;
+ if (xArg < argc) {
+ pitch = atof(argv[xArg]);
+ printf("Setting pitch to %0.2fX\n", pitch);
+ }
+ } else if (!strcmp(argv[xArg], "-q")) {
+ quality = 1;
+ printf("Disabling speed-up heuristics\n");
+ } else if (!strcmp(argv[xArg], "-r")) {
+ xArg++;
+ if (xArg < argc) {
+ rate = atof(argv[xArg]);
+ if (rate == 0.0f) {
+ usage();
+ }
+ printf("Setting rate to %0.2fX\n", rate);
+ }
+ } else if (!strcmp(argv[xArg], "-s")) {
+ xArg++;
+ if (xArg < argc) {
+ speed = atof(argv[xArg]);
+ printf("Setting speed to %0.2fX\n", speed);
+ }
+#ifdef SONIC_SPECTROGRAM
+ } else if (!strcmp(argv[xArg], "-S")) {
+ xArg++;
+ if (xArg < argc) {
+ numCols = atof(argv[xArg]);
+ }
+ xArg++;
+ if (xArg < argc) {
+ numRows = atof(argv[xArg]);
+ computeSpectrogram = 1;
+ printf("Computing spectrogram %d wide and %d tall\n", numCols, numRows);
+ }
+#endif /* SONIC_SPECTROGRAM */
+ } else if (!strcmp(argv[xArg], "-v")) {
+ xArg++;
+ if (xArg < argc) {
+ volume = atof(argv[xArg]);
+ printf("Setting volume to %0.2f\n", volume);
+ }
}
- outFile = openOutputWaveFile(outFileName, sampleRate, numChannels);
- if(outFile == NULL) {
- closeWaveFile(inFile);
- return 1;
- }
- runSonic(inFile, outFile, speed, pitch, rate, volume, emulateChordPitch, quality,
- sampleRate, numChannels);
- closeWaveFile(inFile);
- closeWaveFile(outFile);
- return 0;
+ xArg++;
+ }
+ if (argc - xArg != 2) {
+ usage();
+ }
+ inFileName = argv[xArg];
+ outFileName = argv[xArg + 1];
+ runSonic(inFileName, outFileName, speed, pitch, rate, volume,
+ outputSampleRate, emulateChordPitch, quality,
+ computeSpectrogram, numRows, numCols);
+ return 0;
}
diff --git a/main_lite.c b/main_lite.c
new file mode 100644
index 0000000..b8c972c
--- /dev/null
+++ b/main_lite.c
@@ -0,0 +1,103 @@
+/* This file was written by Bill Cox in 2010, and is licensed under the Apache
+ 2.0 license.
+
+ This file is meant as a simple example for how to use libsonic. It is also a
+ useful utility on its own, which can speed up or slow down wav files, change
+ pitch, and scale volume. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "sonic_lite.h"
+#include "wave.h"
+
+/* Run sonic_lite. */
+static void runSonic(char* inFileName, char* outFileName, float speed, float volume) {
+ waveFile inFile, outFile = NULL;
+ short inBuffer[SONIC_INPUT_SAMPLES], outBuffer[SONIC_INPUT_SAMPLES];
+ int samplesRead, samplesWritten, sampleRate, numChannels;
+
+ inFile = openInputWaveFile(inFileName, &sampleRate, &numChannels);
+ if (numChannels != 1) {
+ fprintf(stderr, "sonic_lite only processes mono wave files. This file has %d channels.\n",
+ numChannels);
+ exit(1);
+ }
+ if (sampleRate != SONIC_SAMPLE_RATE) {
+ fprintf(stderr,
+ "sonic_lite only processes wave files with a sample rate of %d Hz. This file uses %d\n",
+ SONIC_SAMPLE_RATE, sampleRate);
+ exit(1);
+ }
+ if (inFile == NULL) {
+ fprintf(stderr, "Unable to read wave file %s\n", inFileName);
+ exit(1);
+ }
+ outFile = openOutputWaveFile(outFileName, sampleRate, 1);
+ if (outFile == NULL) {
+ closeWaveFile(inFile);
+ fprintf(stderr, "Unable to open wave file %s for writing\n", outFileName);
+ exit(1);
+ }
+ sonicInit();
+ sonicSetSpeed(speed);
+ sonicSetVolume(volume);
+ do {
+ samplesRead = readFromWaveFile(inFile, inBuffer, SONIC_INPUT_SAMPLES);
+ if (samplesRead == 0) {
+ sonicFlushStream();
+ } else {
+ sonicWriteShortToStream(inBuffer, samplesRead);
+ }
+ do {
+ samplesWritten = sonicReadShortFromStream(outBuffer, SONIC_INPUT_SAMPLES);
+ if (samplesWritten > 0) {
+ writeToWaveFile(outFile, outBuffer, samplesWritten);
+ }
+ } while (samplesWritten > 0);
+ } while (samplesRead > 0);
+ closeWaveFile(inFile);
+ closeWaveFile(outFile);
+}
+
+/* Print the usage. */
+static void usage(void) {
+ fprintf(
+ stderr,
+ "Usage: sonic_lite [OPTION]... infile outfile\n"
+ " -s speed -- Set speed up factor. 2.0 means 2X faster.\n"
+ " -v volume -- Scale volume by a constant factor.\n");
+ exit(1);
+}
+
+int main(int argc, char** argv) {
+ char* inFileName;
+ char* outFileName;
+ float speed = 1.0f;
+ float volume = 1.0f;
+ int xArg = 1;
+
+ while (xArg < argc && *(argv[xArg]) == '-') {
+ if (!strcmp(argv[xArg], "-s")) {
+ xArg++;
+ if (xArg < argc) {
+ speed = atof(argv[xArg]);
+ printf("Setting speed to %0.2fX\n", speed);
+ }
+ } else if (!strcmp(argv[xArg], "-v")) {
+ xArg++;
+ if (xArg < argc) {
+ volume = atof(argv[xArg]);
+ printf("Setting volume to %0.2f\n", volume);
+ }
+ }
+ xArg++;
+ }
+ if (argc - xArg != 2) {
+ usage();
+ }
+ inFileName = argv[xArg];
+ outFileName = argv[xArg + 1];
+ runSonic(inFileName, outFileName, speed, volume);
+ return 0;
+}
diff --git a/sonic.c b/sonic.c
index 39cee28..27fcaff 100644
--- a/sonic.c
+++ b/sonic.c
@@ -6,1170 +6,1243 @@
This file is licensed under the Apache 2.0 license.
*/
-#include <stdio.h>
+#include "sonic.h"
+
+#include <limits.h>
+#include <math.h>
#include <stdlib.h>
#include <string.h>
-#include <stdarg.h>
-#ifdef SONIC_USE_SIN
-#include <math.h>
-#ifndef M_PI
-#define M_PI 3.14159265358979323846
+
+/*
+ The following code was used to generate the following sinc lookup table.
+
+ #include <limits.h>
+ #include <math.h>
+ #include <stdio.h>
+
+ double findHannWeight(int N, double x) {
+ return 0.5*(1.0 - cos(2*M_PI*x/N));
+ }
+
+ double findSincCoefficient(int N, double x) {
+ double hannWindowWeight = findHannWeight(N, x);
+ double sincWeight;
+
+ x -= N/2.0;
+ if (x > 1e-9 || x < -1e-9) {
+ sincWeight = sin(M_PI*x)/(M_PI*x);
+ } else {
+ sincWeight = 1.0;
+ }
+ return hannWindowWeight*sincWeight;
+ }
+
+ int main() {
+ double x;
+ int i;
+ int N = 12;
+
+ for (i = 0, x = 0.0; x <= N; x += 0.02, i++) {
+ printf("%u %d\n", i, (int)(SHRT_MAX*findSincCoefficient(N, x)));
+ }
+ return 0;
+ }
+*/
+
+/* The number of points to use in the sinc FIR filter for resampling. */
+#define SINC_FILTER_POINTS \
+ 12 /* I am not able to hear improvement with higher N. */
+#define SINC_TABLE_SIZE 601
+
+/* Lookup table for windowed sinc function of SINC_FILTER_POINTS points. */
+static short sincTable[SINC_TABLE_SIZE] = {
+ 0, 0, 0, 0, 0, 0, 0, -1, -1, -2, -2,
+ -3, -4, -6, -7, -9, -10, -12, -14, -17, -19, -21,
+ -24, -26, -29, -32, -34, -37, -40, -42, -44, -47, -48,
+ -50, -51, -52, -53, -53, -53, -52, -50, -48, -46, -43,
+ -39, -34, -29, -22, -16, -8, 0, 9, 19, 29, 41,
+ 53, 65, 79, 92, 107, 121, 137, 152, 168, 184, 200,
+ 215, 231, 247, 262, 276, 291, 304, 317, 328, 339, 348,
+ 357, 363, 369, 372, 374, 375, 373, 369, 363, 355, 345,
+ 332, 318, 300, 281, 259, 234, 208, 178, 147, 113, 77,
+ 39, 0, -41, -85, -130, -177, -225, -274, -324, -375, -426,
+ -478, -530, -581, -632, -682, -731, -779, -825, -870, -912, -951,
+ -989, -1023, -1053, -1080, -1104, -1123, -1138, -1149, -1154, -1155, -1151,
+ -1141, -1125, -1105, -1078, -1046, -1007, -963, -913, -857, -796, -728,
+ -655, -576, -492, -403, -309, -210, -107, 0, 111, 225, 342,
+ 462, 584, 708, 833, 958, 1084, 1209, 1333, 1455, 1575, 1693,
+ 1807, 1916, 2022, 2122, 2216, 2304, 2384, 2457, 2522, 2579, 2625,
+ 2663, 2689, 2706, 2711, 2705, 2687, 2657, 2614, 2559, 2491, 2411,
+ 2317, 2211, 2092, 1960, 1815, 1658, 1489, 1308, 1115, 912, 698,
+ 474, 241, 0, -249, -506, -769, -1037, -1310, -1586, -1864, -2144,
+ -2424, -2703, -2980, -3254, -3523, -3787, -4043, -4291, -4529, -4757, -4972,
+ -5174, -5360, -5531, -5685, -5819, -5935, -6029, -6101, -6150, -6175, -6175,
+ -6149, -6096, -6015, -5905, -5767, -5599, -5401, -5172, -4912, -4621, -4298,
+ -3944, -3558, -3141, -2693, -2214, -1705, -1166, -597, 0, 625, 1277,
+ 1955, 2658, 3386, 4135, 4906, 5697, 6506, 7332, 8173, 9027, 9893,
+ 10769, 11654, 12544, 13439, 14335, 15232, 16128, 17019, 17904, 18782, 19649,
+ 20504, 21345, 22170, 22977, 23763, 24527, 25268, 25982, 26669, 27327, 27953,
+ 28547, 29107, 29632, 30119, 30569, 30979, 31349, 31678, 31964, 32208, 32408,
+ 32565, 32677, 32744, 32767, 32744, 32677, 32565, 32408, 32208, 31964, 31678,
+ 31349, 30979, 30569, 30119, 29632, 29107, 28547, 27953, 27327, 26669, 25982,
+ 25268, 24527, 23763, 22977, 22170, 21345, 20504, 19649, 18782, 17904, 17019,
+ 16128, 15232, 14335, 13439, 12544, 11654, 10769, 9893, 9027, 8173, 7332,
+ 6506, 5697, 4906, 4135, 3386, 2658, 1955, 1277, 625, 0, -597,
+ -1166, -1705, -2214, -2693, -3141, -3558, -3944, -4298, -4621, -4912, -5172,
+ -5401, -5599, -5767, -5905, -6015, -6096, -6149, -6175, -6175, -6150, -6101,
+ -6029, -5935, -5819, -5685, -5531, -5360, -5174, -4972, -4757, -4529, -4291,
+ -4043, -3787, -3523, -3254, -2980, -2703, -2424, -2144, -1864, -1586, -1310,
+ -1037, -769, -506, -249, 0, 241, 474, 698, 912, 1115, 1308,
+ 1489, 1658, 1815, 1960, 2092, 2211, 2317, 2411, 2491, 2559, 2614,
+ 2657, 2687, 2705, 2711, 2706, 2689, 2663, 2625, 2579, 2522, 2457,
+ 2384, 2304, 2216, 2122, 2022, 1916, 1807, 1693, 1575, 1455, 1333,
+ 1209, 1084, 958, 833, 708, 584, 462, 342, 225, 111, 0,
+ -107, -210, -309, -403, -492, -576, -655, -728, -796, -857, -913,
+ -963, -1007, -1046, -1078, -1105, -1125, -1141, -1151, -1155, -1154, -1149,
+ -1138, -1123, -1104, -1080, -1053, -1023, -989, -951, -912, -870, -825,
+ -779, -731, -682, -632, -581, -530, -478, -426, -375, -324, -274,
+ -225, -177, -130, -85, -41, 0, 39, 77, 113, 147, 178,
+ 208, 234, 259, 281, 300, 318, 332, 345, 355, 363, 369,
+ 373, 375, 374, 372, 369, 363, 357, 348, 339, 328, 317,
+ 304, 291, 276, 262, 247, 231, 215, 200, 184, 168, 152,
+ 137, 121, 107, 92, 79, 65, 53, 41, 29, 19, 9,
+ 0, -8, -16, -22, -29, -34, -39, -43, -46, -48, -50,
+ -52, -53, -53, -53, -52, -51, -50, -48, -47, -44, -42,
+ -40, -37, -34, -32, -29, -26, -24, -21, -19, -17, -14,
+ -12, -10, -9, -7, -6, -4, -3, -2, -2, -1, -1,
+ 0, 0, 0, 0, 0, 0, 0};
+
+/* These functions allocate out of a static array rather than calling
+ calloc/realloc/free if the NO_MALLOC flag is defined. Otherwise, call
+ calloc/realloc/free as usual. This is useful for running on small
+ microcontrollers. */
+#ifndef SONIC_NO_MALLOC
+
+/* Just call calloc. */
+static void *sonicCalloc(int num, int size) {
+ return calloc(num, size);
+}
+
+/* Just call realloc */
+static void *sonicRealloc(void *p, int oldNum, int newNum, int size) {
+ return realloc(p, newNum * size);
+}
+
+/* Just call free. */
+static void sonicFree(void *p) {
+ free(p);
+}
+
+#else
+
+#ifndef SONIC_MAX_MEMORY
+/* Large enough for speedup/slowdown at 8KHz, 16-bit mono samples/second. */
+#define SONIC_MAX_MEMORY (16 * 1024)
#endif
+
+/* This static buffer is used to hold data allocated for the sonicStream struct
+ and its buffers. There should never be more than one sonicStream in use at a
+ time when using SONIC_NO_MALLOC mode. Calls to realloc move the data to the
+ end of memoryBuffer. Calls to free reset the memory buffer to empty. */
+static void*
+ memoryBufferAligned[(SONIC_MAX_MEMORY + sizeof(void) - 1) / sizeof(void*)];
+static unsigned char* memoryBuffer = (unsigned char*)memoryBufferAligned;
+static int memoryBufferPos = 0;
+
+/* Allocate elements from a static memory buffer. */
+static void *sonicCalloc(int num, int size) {
+ int len = num * size;
+
+ if (memoryBufferPos + len > SONIC_MAX_MEMORY) {
+ return 0;
+ }
+ unsigned char *p = memoryBuffer + memoryBufferPos;
+ memoryBufferPos += len;
+ memset(p, 0, len);
+ return p;
+}
+
+/* Preferably, SONIC_MAX_MEMORY has been set large enough that this is never
+ * called. */
+static void *sonicRealloc(void *p, int oldNum, int newNum, int size) {
+ if (newNum <= oldNum) {
+ return p;
+ }
+ void *newBuffer = sonicCalloc(newNum, size);
+ if (newBuffer == NULL) {
+ return NULL;
+ }
+ memcpy(newBuffer, p, oldNum * size);
+ return newBuffer;
+}
+
+/* Reset memoryBufferPos to 0. We asssume all data is freed at the same time. */
+static void sonicFree(void *p) {
+ memoryBufferPos = 0;
+}
+
#endif
-#include "sonic.h"
struct sonicStreamStruct {
- short *inputBuffer;
- short *outputBuffer;
- short *pitchBuffer;
- short *downSampleBuffer;
- float speed;
- float volume;
- float pitch;
- float rate;
- int oldRatePosition;
- int newRatePosition;
- int useChordPitch;
- int quality;
- int numChannels;
- int inputBufferSize;
- int pitchBufferSize;
- int outputBufferSize;
- int numInputSamples;
- int numOutputSamples;
- int numPitchSamples;
- int minPeriod;
- int maxPeriod;
- int maxRequired;
- int remainingInputToCopy;
- int sampleRate;
- int prevPeriod;
- int prevMinDiff;
+#ifdef SONIC_SPECTROGRAM
+ sonicSpectrogram spectrogram;
+#endif /* SONIC_SPECTROGRAM */
+ short* inputBuffer;
+ short* outputBuffer;
+ short* pitchBuffer;
+ short* downSampleBuffer;
+ void* userData;
+ float speed;
+ float volume;
+ float pitch;
+ float rate;
+ /* The point of the following 3 new variables is to gracefully handle rapidly
+ changing input speed.
+
+ samplePeriod is just 1.0/sampleRate. It is used in accumulating
+ inputPlayTime, which is how long we expect the total time should be to play
+ the current input samples in the input buffer. timeError keeps track of
+ the error in play time created when playing < 2.0X speed, where we either
+ insert or delete a whole pitch period. This can cause the output generated
+ from the input to be off in play time by up to a pitch period. timeError
+ replaces PICOLA's concept of the number of samples to play unmodified after
+ a pitch period insertion or deletion. If speeding up, and the error is >=
+ 0.0, then remove a pitch period, and play samples unmodified until
+ timeError is >= 0 again. If slowing down, and the error is <= 0.0,
+ then add a pitch period, and play samples unmodified until timeError is <=
+ 0 again. */
+ float samplePeriod; /* How long each output sample takes to play. */
+ /* How long we expect the entire input buffer to take to play. */
+ float inputPlayTime;
+ /* The difference in when the latest output sample was played vs when we wanted. */
+ float timeError;
+ int oldRatePosition;
+ int newRatePosition;
+ int quality;
+ int numChannels;
+ int inputBufferSize;
+ int pitchBufferSize;
+ int outputBufferSize;
+ int numInputSamples;
+ int numOutputSamples;
+ int numPitchSamples;
+ int minPeriod;
+ int maxPeriod;
+ int maxRequired;
+ int remainingInputToCopy;
+ int sampleRate;
+ int prevPeriod;
+ int prevMinDiff;
};
-/* Just used for debugging */
-/*
-void sonicMSG(char *format, ...)
-{
- char buffer[4096];
- va_list ap;
- FILE *file;
-
- va_start(ap, format);
- vsprintf((char *)buffer, (char *)format, ap);
- va_end(ap);
- file=fopen("/tmp/sonic.log", "a");
- fprintf(file, "%s", buffer);
- fclose(file);
+#ifdef SONIC_SPECTROGRAM
+
+/* Attach user data to the stream. */
+void sonicSetUserData(sonicStream stream, void *userData) {
+ stream->userData = userData;
}
-*/
+
+/* Retrieve user data attached to the stream. */
+void *sonicGetUserData(sonicStream stream) {
+ return stream->userData;
+}
+
+/* Compute a spectrogram on the fly. */
+void sonicComputeSpectrogram(sonicStream stream) {
+ stream->spectrogram = sonicCreateSpectrogram(stream->sampleRate);
+ /* Force changeSpeed to be called to compute the spectrogram. */
+ sonicSetSpeed(stream, 2.0);
+}
+
+/* Get the spectrogram. */
+sonicSpectrogram sonicGetSpectrogram(sonicStream stream) {
+ return stream->spectrogram;
+}
+
+#endif
/* Scale the samples by the factor. */
-static void scaleSamples(
- short *samples,
- int numSamples,
- float volume)
-{
- int fixedPointVolume = volume*4096.0f;
- int value;
-
- while(numSamples--) {
- value = (*samples*fixedPointVolume) >> 12;
- if(value > 32767) {
- value = 32767;
- } else if(value < -32767) {
- value = -32767;
- }
- *samples++ = value;
- }
+static void scaleSamples(short* samples, int numSamples, float volume) {
+ /* This is 24-bit integer and 8-bit fraction fixed-point representation. */
+ int fixedPointVolume = volume * 256.0f;
+ int value;
+
+ while (numSamples--) {
+ value = (*samples * fixedPointVolume) >> 8;
+ if (value > 32767) {
+ value = 32767;
+ } else if (value < -32767) {
+ value = -32767;
+ }
+ *samples++ = value;
+ }
}
/* Get the speed of the stream. */
-float sonicGetSpeed(
- sonicStream stream)
-{
- return stream->speed;
-}
+float sonicGetSpeed(sonicStream stream) { return stream->speed; }
/* Set the speed of the stream. */
-void sonicSetSpeed(
- sonicStream stream,
- float speed)
-{
- stream->speed = speed;
-}
+void sonicSetSpeed(sonicStream stream, float speed) { stream->speed = speed; }
/* Get the pitch of the stream. */
-float sonicGetPitch(
- sonicStream stream)
-{
- return stream->pitch;
-}
+float sonicGetPitch(sonicStream stream) { return stream->pitch; }
/* Set the pitch of the stream. */
-void sonicSetPitch(
- sonicStream stream,
- float pitch)
-{
- stream->pitch = pitch;
-}
+void sonicSetPitch(sonicStream stream, float pitch) { stream->pitch = pitch; }
/* Get the rate of the stream. */
-float sonicGetRate(
- sonicStream stream)
-{
- return stream->rate;
-}
+float sonicGetRate(sonicStream stream) { return stream->rate; }
-/* Set the playback rate of the stream. This scales pitch and speed at the same time. */
-void sonicSetRate(
- sonicStream stream,
- float rate)
-{
- stream->rate = rate;
+/* Set the playback rate of the stream. This scales pitch and speed at the same
+ time. */
+void sonicSetRate(sonicStream stream, float rate) {
+ stream->rate = rate;
- stream->oldRatePosition = 0;
- stream->newRatePosition = 0;
+ stream->oldRatePosition = 0;
+ stream->newRatePosition = 0;
}
-/* Get the vocal chord pitch setting. */
-int sonicGetChordPitch(
- sonicStream stream)
-{
- return stream->useChordPitch;
+/* DEPRECATED. Get the vocal chord pitch setting. */
+int sonicGetChordPitch(sonicStream stream) {
+ return 0;
}
-/* Set the vocal chord mode for pitch computation. Default is off. */
-void sonicSetChordPitch(
- sonicStream stream,
- int useChordPitch)
-{
- stream->useChordPitch = useChordPitch;
+/* DEPRECATED. Set the vocal chord mode for pitch computation. Default is off. */
+void sonicSetChordPitch(sonicStream stream, int useChordPitch) {
}
/* Get the quality setting. */
-int sonicGetQuality(
- sonicStream stream)
-{
- return stream->quality;
-}
+int sonicGetQuality(sonicStream stream) { return stream->quality; }
-/* Set the "quality". Default 0 is virtually as good as 1, but very much faster. */
-void sonicSetQuality(
- sonicStream stream,
- int quality)
-{
- stream->quality = quality;
+/* Set the "quality". Default 0 is virtually as good as 1, but very much
+ faster. */
+void sonicSetQuality(sonicStream stream, int quality) {
+ stream->quality = quality;
}
/* Get the scaling factor of the stream. */
-float sonicGetVolume(
- sonicStream stream)
-{
- return stream->volume;
-}
+float sonicGetVolume(sonicStream stream) { return stream->volume; }
/* Set the scaling factor of the stream. */
-void sonicSetVolume(
- sonicStream stream,
- float volume)
-{
- stream->volume = volume;
+void sonicSetVolume(sonicStream stream, float volume) {
+ stream->volume = volume;
}
/* Free stream buffers. */
-static void freeStreamBuffers(
- sonicStream stream)
-{
- if(stream->inputBuffer != NULL) {
- free(stream->inputBuffer);
- }
- if(stream->outputBuffer != NULL) {
- free(stream->outputBuffer);
- }
- if(stream->pitchBuffer != NULL) {
- free(stream->pitchBuffer);
- }
- if(stream->downSampleBuffer != NULL) {
- free(stream->downSampleBuffer);
- }
+static void freeStreamBuffers(sonicStream stream) {
+ if (stream->inputBuffer != NULL) {
+ sonicFree(stream->inputBuffer);
+ }
+ if (stream->outputBuffer != NULL) {
+ sonicFree(stream->outputBuffer);
+ }
+ if (stream->pitchBuffer != NULL) {
+ sonicFree(stream->pitchBuffer);
+ }
+ if (stream->downSampleBuffer != NULL) {
+ sonicFree(stream->downSampleBuffer);
+ }
}
/* Destroy the sonic stream. */
-void sonicDestroyStream(
- sonicStream stream)
-{
- freeStreamBuffers(stream);
- free(stream);
+void sonicDestroyStream(sonicStream stream) {
+#ifdef SONIC_SPECTROGRAM
+ if (stream->spectrogram != NULL) {
+ sonicDestroySpectrogram(stream->spectrogram);
+ }
+#endif /* SONIC_SPECTROGRAM */
+ freeStreamBuffers(stream);
+ sonicFree(stream);
+}
+
+/* Compute the number of samples to skip to down-sample the input. */
+static int computeSkip(sonicStream stream) {
+ int skip = 1;
+ if (stream->sampleRate > SONIC_AMDF_FREQ && stream->quality == 0) {
+ skip = stream->sampleRate / SONIC_AMDF_FREQ;
+ }
+ return skip;
}
/* Allocate stream buffers. */
-static int allocateStreamBuffers(
- sonicStream stream,
- int sampleRate,
- int numChannels)
-{
- int minPeriod = sampleRate/SONIC_MAX_PITCH;
- int maxPeriod = sampleRate/SONIC_MIN_PITCH;
- int maxRequired = 2*maxPeriod;
-
- stream->inputBufferSize = maxRequired;
- stream->inputBuffer = (short *)calloc(maxRequired, sizeof(short)*numChannels);
- if(stream->inputBuffer == NULL) {
- sonicDestroyStream(stream);
- return 0;
- }
- stream->outputBufferSize = maxRequired;
- stream->outputBuffer = (short *)calloc(maxRequired, sizeof(short)*numChannels);
- if(stream->outputBuffer == NULL) {
- sonicDestroyStream(stream);
- return 0;
- }
- stream->pitchBufferSize = maxRequired;
- stream->pitchBuffer = (short *)calloc(maxRequired, sizeof(short)*numChannels);
- if(stream->pitchBuffer == NULL) {
- sonicDestroyStream(stream);
- return 0;
- }
- stream->downSampleBuffer = (short *)calloc(maxRequired, sizeof(short));
- if(stream->downSampleBuffer == NULL) {
- sonicDestroyStream(stream);
- return 0;
- }
- stream->sampleRate = sampleRate;
- stream->numChannels = numChannels;
- stream->oldRatePosition = 0;
- stream->newRatePosition = 0;
- stream->minPeriod = minPeriod;
- stream->maxPeriod = maxPeriod;
- stream->maxRequired = maxRequired;
- stream->prevPeriod = 0;
- return 1;
+static int allocateStreamBuffers(sonicStream stream, int sampleRate,
+ int numChannels) {
+ int minPeriod = sampleRate / SONIC_MAX_PITCH;
+ int maxPeriod = sampleRate / SONIC_MIN_PITCH;
+ int maxRequired = 2 * maxPeriod;
+ int skip = computeSkip(stream);
+
+ /* Allocate 25% more than needed so we hopefully won't grow. */
+ stream->inputBufferSize = maxRequired + (maxRequired >> 2);;
+ stream->inputBuffer =
+ (short*)sonicCalloc(stream->inputBufferSize, sizeof(short) * numChannels);
+ if (stream->inputBuffer == NULL) {
+ sonicDestroyStream(stream);
+ return 0;
+ }
+ /* Allocate 25% more than needed so we hopefully won't grow. */
+ stream->outputBufferSize = maxRequired + (maxRequired >> 2);
+ stream->outputBuffer =
+ (short*)sonicCalloc(stream->outputBufferSize, sizeof(short) * numChannels);
+ if (stream->outputBuffer == NULL) {
+ sonicDestroyStream(stream);
+ return 0;
+ }
+ /* Allocate 25% more than needed so we hopefully won't grow. */
+ stream->pitchBufferSize = maxRequired + (maxRequired >> 2);
+ stream->pitchBuffer =
+ (short*)sonicCalloc(maxRequired, sizeof(short) * numChannels);
+ if (stream->pitchBuffer == NULL) {
+ sonicDestroyStream(stream);
+ return 0;
+ }
+ int downSampleBufferSize = (maxRequired + skip - 1)/ skip;
+ stream->downSampleBuffer = (short*)sonicCalloc(downSampleBufferSize, sizeof(short));
+ if (stream->downSampleBuffer == NULL) {
+ sonicDestroyStream(stream);
+ return 0;
+ }
+ stream->sampleRate = sampleRate;
+ stream->samplePeriod = 1.0 / sampleRate;
+ stream->numChannels = numChannels;
+ stream->oldRatePosition = 0;
+ stream->newRatePosition = 0;
+ stream->minPeriod = minPeriod;
+ stream->maxPeriod = maxPeriod;
+ stream->maxRequired = maxRequired;
+ stream->prevPeriod = 0;
+ return 1;
}
/* Create a sonic stream. Return NULL only if we are out of memory and cannot
allocate the stream. */
-sonicStream sonicCreateStream(
- int sampleRate,
- int numChannels)
-{
- sonicStream stream = (sonicStream)calloc(1, sizeof(struct sonicStreamStruct));
-
- if(stream == NULL) {
- return NULL;
- }
- if(!allocateStreamBuffers(stream, sampleRate, numChannels)) {
- return NULL;
- }
- stream->speed = 1.0f;
- stream->pitch = 1.0f;
- stream->volume = 1.0f;
- stream->rate = 1.0f;
- stream->oldRatePosition = 0;
- stream->newRatePosition = 0;
- stream->useChordPitch = 0;
- stream->quality = 0;
- return stream;
+sonicStream sonicCreateStream(int sampleRate, int numChannels) {
+ sonicStream stream = (sonicStream)sonicCalloc(
+ 1, sizeof(struct sonicStreamStruct));
+
+ if (stream == NULL) {
+ return NULL;
+ }
+ if (!allocateStreamBuffers(stream, sampleRate, numChannels)) {
+ return NULL;
+ }
+ stream->speed = 1.0f;
+ stream->pitch = 1.0f;
+ stream->volume = 1.0f;
+ stream->rate = 1.0f;
+ stream->oldRatePosition = 0;
+ stream->newRatePosition = 0;
+ stream->quality = 0;
+ return stream;
}
/* Get the sample rate of the stream. */
-int sonicGetSampleRate(
- sonicStream stream)
-{
- return stream->sampleRate;
-}
+int sonicGetSampleRate(sonicStream stream) { return stream->sampleRate; }
-/* Set the sample rate of the stream. This will cause samples buffered in the stream to
- be lost. */
-void sonicSetSampleRate(
- sonicStream stream,
- int sampleRate)
-{
- freeStreamBuffers(stream);
- allocateStreamBuffers(stream, sampleRate, stream->numChannels);
+/* Set the sample rate of the stream. This will cause samples buffered in the
+ stream to be lost. */
+void sonicSetSampleRate(sonicStream stream, int sampleRate) {
+ freeStreamBuffers(stream);
+ allocateStreamBuffers(stream, sampleRate, stream->numChannels);
}
/* Get the number of channels. */
-int sonicGetNumChannels(
- sonicStream stream)
-{
- return stream->numChannels;
-}
+int sonicGetNumChannels(sonicStream stream) { return stream->numChannels; }
-/* Set the num channels of the stream. This will cause samples buffered in the stream to
- be lost. */
-void sonicSetNumChannels(
- sonicStream stream,
- int numChannels)
-{
- freeStreamBuffers(stream);
- allocateStreamBuffers(stream, stream->sampleRate, numChannels);
+/* Set the num channels of the stream. This will cause samples buffered in the
+ stream to be lost. */
+void sonicSetNumChannels(sonicStream stream, int numChannels) {
+ freeStreamBuffers(stream);
+ allocateStreamBuffers(stream, stream->sampleRate, numChannels);
}
/* Enlarge the output buffer if needed. */
-static int enlargeOutputBufferIfNeeded(
- sonicStream stream,
- int numSamples)
-{
- if(stream->numOutputSamples + numSamples > stream->outputBufferSize) {
- stream->outputBufferSize += (stream->outputBufferSize >> 1) + numSamples;
- stream->outputBuffer = (short *)realloc(stream->outputBuffer,
- stream->outputBufferSize*sizeof(short)*stream->numChannels);
- if(stream->outputBuffer == NULL) {
- return 0;
- }
- }
- return 1;
+static int enlargeOutputBufferIfNeeded(sonicStream stream, int numSamples) {
+ int outputBufferSize = stream->outputBufferSize;
+
+ if (stream->numOutputSamples + numSamples > outputBufferSize) {
+ stream->outputBufferSize += (outputBufferSize >> 1) + numSamples;
+ stream->outputBuffer = (short*)sonicRealloc(
+ stream->outputBuffer,
+ outputBufferSize,
+ stream->outputBufferSize,
+ sizeof(short) * stream->numChannels);
+ if (stream->outputBuffer == NULL) {
+ return 0;
+ }
+ }
+ return 1;
}
/* Enlarge the input buffer if needed. */
-static int enlargeInputBufferIfNeeded(
- sonicStream stream,
- int numSamples)
-{
- if(stream->numInputSamples + numSamples > stream->inputBufferSize) {
- stream->inputBufferSize += (stream->inputBufferSize >> 1) + numSamples;
- stream->inputBuffer = (short *)realloc(stream->inputBuffer,
- stream->inputBufferSize*sizeof(short)*stream->numChannels);
- if(stream->inputBuffer == NULL) {
- return 0;
- }
- }
- return 1;
+static int enlargeInputBufferIfNeeded(sonicStream stream, int numSamples) {
+ int inputBufferSize = stream->inputBufferSize;
+
+ if (stream->numInputSamples + numSamples > inputBufferSize) {
+ stream->inputBufferSize += (inputBufferSize >> 1) + numSamples;
+ stream->inputBuffer = (short*)sonicRealloc(
+ stream->inputBuffer,
+ inputBufferSize,
+ stream->inputBufferSize,
+ sizeof(short) * stream->numChannels);
+ if (stream->inputBuffer == NULL) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/* Update stream->numInputSamples, and update stream->inputPlayTime. Call this
+ whenever adding samples to the input buffer, to keep track of total expected
+ input play time accounting. */
+static void updateNumInputSamples(sonicStream stream, int numSamples) {
+ float speed = stream->speed / stream->pitch;
+
+ stream->numInputSamples += numSamples;
+ stream->inputPlayTime += numSamples * stream->samplePeriod / speed;
}
/* Add the input samples to the input buffer. */
-static int addFloatSamplesToInputBuffer(
- sonicStream stream,
- float *samples,
- int numSamples)
-{
- short *buffer;
- int count = numSamples*stream->numChannels;
-
- if(numSamples == 0) {
- return 1;
- }
- if(!enlargeInputBufferIfNeeded(stream, numSamples)) {
- return 0;
- }
- buffer = stream->inputBuffer + stream->numInputSamples*stream->numChannels;
- while(count--) {
- *buffer++ = (*samples++)*32767.0f;
- }
- stream->numInputSamples += numSamples;
+static int addFloatSamplesToInputBuffer(sonicStream stream, const float* samples,
+ int numSamples) {
+ short* buffer;
+ int count = numSamples * stream->numChannels;
+
+ if (numSamples == 0) {
return 1;
+ }
+ if (!enlargeInputBufferIfNeeded(stream, numSamples)) {
+ return 0;
+ }
+ buffer = stream->inputBuffer + stream->numInputSamples * stream->numChannels;
+ while (count--) {
+ *buffer++ = (*samples++) * 32767.0f;
+ }
+ updateNumInputSamples(stream, numSamples);
+ return 1;
}
/* Add the input samples to the input buffer. */
-static int addShortSamplesToInputBuffer(
- sonicStream stream,
- short *samples,
- int numSamples)
-{
- if(numSamples == 0) {
- return 1;
- }
- if(!enlargeInputBufferIfNeeded(stream, numSamples)) {
- return 0;
- }
- memcpy(stream->inputBuffer + stream->numInputSamples*stream->numChannels, samples,
- numSamples*sizeof(short)*stream->numChannels);
- stream->numInputSamples += numSamples;
+static int addShortSamplesToInputBuffer(sonicStream stream, const short* samples,
+ int numSamples) {
+ if (numSamples == 0) {
return 1;
+ }
+ if (!enlargeInputBufferIfNeeded(stream, numSamples)) {
+ return 0;
+ }
+ memcpy(stream->inputBuffer + stream->numInputSamples * stream->numChannels,
+ samples, numSamples * sizeof(short) * stream->numChannels);
+ updateNumInputSamples(stream, numSamples);
+ return 1;
}
/* Add the input samples to the input buffer. */
-static int addUnsignedCharSamplesToInputBuffer(
- sonicStream stream,
- unsigned char *samples,
- int numSamples)
-{
- short *buffer;
- int count = numSamples*stream->numChannels;
-
- if(numSamples == 0) {
- return 1;
- }
- if(!enlargeInputBufferIfNeeded(stream, numSamples)) {
- return 0;
- }
- buffer = stream->inputBuffer + stream->numInputSamples*stream->numChannels;
- while(count--) {
- *buffer++ = (*samples++ - 128) << 8;
- }
- stream->numInputSamples += numSamples;
+static int addUnsignedCharSamplesToInputBuffer(sonicStream stream,
+ const unsigned char* samples,
+ int numSamples) {
+ short* buffer;
+ int count = numSamples * stream->numChannels;
+
+ if (numSamples == 0) {
return 1;
+ }
+ if (!enlargeInputBufferIfNeeded(stream, numSamples)) {
+ return 0;
+ }
+ buffer = stream->inputBuffer + stream->numInputSamples * stream->numChannels;
+ while (count--) {
+ *buffer++ = (*samples++ - 128) << 8;
+ }
+ updateNumInputSamples(stream, numSamples);
+ return 1;
}
/* Remove input samples that we have already processed. */
-static void removeInputSamples(
- sonicStream stream,
- int position)
-{
- int remainingSamples = stream->numInputSamples - position;
-
- if(remainingSamples > 0) {
- memmove(stream->inputBuffer, stream->inputBuffer + position*stream->numChannels,
- remainingSamples*sizeof(short)*stream->numChannels);
- }
- stream->numInputSamples = remainingSamples;
+static void removeInputSamples(sonicStream stream, int position) {
+ int remainingSamples = stream->numInputSamples - position;
+
+ if (remainingSamples > 0) {
+ memmove(stream->inputBuffer,
+ stream->inputBuffer + position * stream->numChannels,
+ remainingSamples * sizeof(short) * stream->numChannels);
+ }
+ /* If we play 3/4ths of the samples, then the expected play time of the
+ remaining samples is 1/4th of the original expected play time. */
+ stream->inputPlayTime =
+ (stream->inputPlayTime * remainingSamples) / stream->numInputSamples;
+ stream->numInputSamples = remainingSamples;
}
-/* Just copy from the array to the output buffer */
-static int copyToOutput(
- sonicStream stream,
- short *samples,
- int numSamples)
-{
- if(!enlargeOutputBufferIfNeeded(stream, numSamples)) {
- return 0;
- }
- memcpy(stream->outputBuffer + stream->numOutputSamples*stream->numChannels,
- samples, numSamples*sizeof(short)*stream->numChannels);
- stream->numOutputSamples += numSamples;
- return 1;
+/* Copy from the input buffer to the output buffer, and remove the samples from
+ the input buffer. */
+static int copyInputToOutput(sonicStream stream, int numSamples) {
+ if (!enlargeOutputBufferIfNeeded(stream, numSamples)) {
+ return 0;
+ }
+ memcpy(stream->outputBuffer + stream->numOutputSamples * stream->numChannels,
+ stream->inputBuffer, numSamples * sizeof(short) * stream->numChannels);
+ stream->numOutputSamples += numSamples;
+ removeInputSamples(stream, numSamples);
+ return 1;
}
-/* Just copy from the input buffer to the output buffer. Return 0 if we fail to
- resize the output buffer. Otherwise, return numSamples */
-static int copyInputToOutput(
- sonicStream stream,
- int position)
-{
- int numSamples = stream->remainingInputToCopy;
-
- if(numSamples > stream->maxRequired) {
- numSamples = stream->maxRequired;
- }
- if(!copyToOutput(stream, stream->inputBuffer + position*stream->numChannels,
- numSamples)) {
- return 0;
- }
- stream->remainingInputToCopy -= numSamples;
- return numSamples;
+/* Copy from samples to the output buffer */
+static int copyToOutput(sonicStream stream, short* samples, int numSamples) {
+ if (!enlargeOutputBufferIfNeeded(stream, numSamples)) {
+ return 0;
+ }
+ memcpy(stream->outputBuffer + stream->numOutputSamples * stream->numChannels,
+ samples, numSamples * sizeof(short) * stream->numChannels);
+ stream->numOutputSamples += numSamples;
+ return 1;
}
/* Read data out of the stream. Sometimes no data will be available, and zero
is returned, which is not an error condition. */
-int sonicReadFloatFromStream(
- sonicStream stream,
- float *samples,
- int maxSamples)
-{
- int numSamples = stream->numOutputSamples;
- int remainingSamples = 0;
- short *buffer;
- int count;
-
- if(numSamples == 0) {
- return 0;
- }
- if(numSamples > maxSamples) {
- remainingSamples = numSamples - maxSamples;
- numSamples = maxSamples;
- }
- buffer = stream->outputBuffer;
- count = numSamples*stream->numChannels;
- while(count--) {
- *samples++ = (*buffer++)/32767.0f;
- }
- if(remainingSamples > 0) {
- memmove(stream->outputBuffer, stream->outputBuffer + numSamples*stream->numChannels,
- remainingSamples*sizeof(short)*stream->numChannels);
- }
- stream->numOutputSamples = remainingSamples;
- return numSamples;
+int sonicReadFloatFromStream(sonicStream stream, float* samples,
+ int maxSamples) {
+ int numSamples = stream->numOutputSamples;
+ int remainingSamples = 0;
+ short* buffer;
+ int count;
+
+ if (numSamples == 0) {
+ return 0;
+ }
+ if (numSamples > maxSamples) {
+ remainingSamples = numSamples - maxSamples;
+ numSamples = maxSamples;
+ }
+ buffer = stream->outputBuffer;
+ count = numSamples * stream->numChannels;
+ while (count--) {
+ *samples++ = (*buffer++) / 32767.0f;
+ }
+ if (remainingSamples > 0) {
+ memmove(stream->outputBuffer,
+ stream->outputBuffer + numSamples * stream->numChannels,
+ remainingSamples * sizeof(short) * stream->numChannels);
+ }
+ stream->numOutputSamples = remainingSamples;
+ return numSamples;
}
-/* Read short data out of the stream. Sometimes no data will be available, and zero
- is returned, which is not an error condition. */
-int sonicReadShortFromStream(
- sonicStream stream,
- short *samples,
- int maxSamples)
-{
- int numSamples = stream->numOutputSamples;
- int remainingSamples = 0;
-
- if(numSamples == 0) {
- return 0;
- }
- if(numSamples > maxSamples) {
- remainingSamples = numSamples - maxSamples;
- numSamples = maxSamples;
- }
- memcpy(samples, stream->outputBuffer, numSamples*sizeof(short)*stream->numChannels);
- if(remainingSamples > 0) {
- memmove(stream->outputBuffer, stream->outputBuffer + numSamples*stream->numChannels,
- remainingSamples*sizeof(short)*stream->numChannels);
- }
- stream->numOutputSamples = remainingSamples;
- return numSamples;
+/* Read short data out of the stream. Sometimes no data will be available, and
+ zero is returned, which is not an error condition. */
+int sonicReadShortFromStream(sonicStream stream, short* samples,
+ int maxSamples) {
+ int numSamples = stream->numOutputSamples;
+ int remainingSamples = 0;
+
+ if (numSamples == 0) {
+ return 0;
+ }
+ if (numSamples > maxSamples) {
+ remainingSamples = numSamples - maxSamples;
+ numSamples = maxSamples;
+ }
+ memcpy(samples, stream->outputBuffer,
+ numSamples * sizeof(short) * stream->numChannels);
+ if (remainingSamples > 0) {
+ memmove(stream->outputBuffer,
+ stream->outputBuffer + numSamples * stream->numChannels,
+ remainingSamples * sizeof(short) * stream->numChannels);
+ }
+ stream->numOutputSamples = remainingSamples;
+ return numSamples;
}
-/* Read unsigned char data out of the stream. Sometimes no data will be available, and zero
- is returned, which is not an error condition. */
-int sonicReadUnsignedCharFromStream(
- sonicStream stream,
- unsigned char *samples,
- int maxSamples)
-{
- int numSamples = stream->numOutputSamples;
- int remainingSamples = 0;
- short *buffer;
- int count;
-
- if(numSamples == 0) {
- return 0;
- }
- if(numSamples > maxSamples) {
- remainingSamples = numSamples - maxSamples;
- numSamples = maxSamples;
- }
- buffer = stream->outputBuffer;
- count = numSamples*stream->numChannels;
- while(count--) {
- *samples++ = (char)((*buffer++) >> 8) + 128;
- }
- if(remainingSamples > 0) {
- memmove(stream->outputBuffer, stream->outputBuffer + numSamples*stream->numChannels,
- remainingSamples*sizeof(short)*stream->numChannels);
- }
- stream->numOutputSamples = remainingSamples;
- return numSamples;
+/* Read unsigned char data out of the stream. Sometimes no data will be
+ available, and zero is returned, which is not an error condition. */
+int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char* samples,
+ int maxSamples) {
+ int numSamples = stream->numOutputSamples;
+ int remainingSamples = 0;
+ short* buffer;
+ int count;
+
+ if (numSamples == 0) {
+ return 0;
+ }
+ if (numSamples > maxSamples) {
+ remainingSamples = numSamples - maxSamples;
+ numSamples = maxSamples;
+ }
+ buffer = stream->outputBuffer;
+ count = numSamples * stream->numChannels;
+ while (count--) {
+ *samples++ = (char)((*buffer++) >> 8) + 128;
+ }
+ if (remainingSamples > 0) {
+ memmove(stream->outputBuffer,
+ stream->outputBuffer + numSamples * stream->numChannels,
+ remainingSamples * sizeof(short) * stream->numChannels);
+ }
+ stream->numOutputSamples = remainingSamples;
+ return numSamples;
}
/* Force the sonic stream to generate output using whatever data it currently
- has. No extra delay will be added to the output, but flushing in the middle of
- words could introduce distortion. */
-int sonicFlushStream(
- sonicStream stream)
-{
- int maxRequired = stream->maxRequired;
- int remainingSamples = stream->numInputSamples;
- float speed = stream->speed/stream->pitch;
- float rate = stream->rate*stream->pitch;
- int expectedOutputSamples = stream->numOutputSamples +
- (int)((remainingSamples/speed + stream->numPitchSamples)/rate + 0.5f);
-
- /* Add enough silence to flush both input and pitch buffers. */
- if(!enlargeInputBufferIfNeeded(stream, remainingSamples + 2*maxRequired)) {
- return 0;
- }
- memset(stream->inputBuffer + remainingSamples*stream->numChannels, 0,
- 2*maxRequired*sizeof(short)*stream->numChannels);
- stream->numInputSamples += 2*maxRequired;
- if(!sonicWriteShortToStream(stream, NULL, 0)) {
- return 0;
- }
- /* Throw away any extra samples we generated due to the silence we added */
- if(stream->numOutputSamples > expectedOutputSamples) {
- stream->numOutputSamples = expectedOutputSamples;
- }
- /* Empty input and pitch buffers */
- stream->numInputSamples = 0;
- stream->remainingInputToCopy = 0;
- stream->numPitchSamples = 0;
- return 1;
+ has. No extra delay will be added to the output, but flushing in the middle
+ of words could introduce distortion. */
+int sonicFlushStream(sonicStream stream) {
+ int maxRequired = stream->maxRequired;
+ int remainingSamples = stream->numInputSamples;
+ float speed = stream->speed / stream->pitch;
+ float rate = stream->rate * stream->pitch;
+ int expectedOutputSamples =
+ stream->numOutputSamples +
+ (int)((remainingSamples / speed + stream->numPitchSamples) / rate + 0.5f);
+
+ /* Add enough silence to flush both input and pitch buffers. */
+ if (!enlargeInputBufferIfNeeded(stream, remainingSamples + 2 * maxRequired)) {
+ return 0;
+ }
+ memset(stream->inputBuffer + remainingSamples * stream->numChannels, 0,
+ 2 * maxRequired * sizeof(short) * stream->numChannels);
+ stream->numInputSamples += 2 * maxRequired;
+ if (!sonicWriteShortToStream(stream, NULL, 0)) {
+ return 0;
+ }
+ /* Throw away any extra samples we generated due to the silence we added */
+ if (stream->numOutputSamples > expectedOutputSamples) {
+ stream->numOutputSamples = expectedOutputSamples;
+ }
+ /* Empty input and pitch buffers */
+ stream->numInputSamples = 0;
+ stream->inputPlayTime = 0.0f;
+ stream->timeError = 0.0f;
+ stream->numPitchSamples = 0;
+ return 1;
}
/* Return the number of samples in the output buffer */
-int sonicSamplesAvailable(
- sonicStream stream)
-{
- return stream->numOutputSamples;
+int sonicSamplesAvailable(sonicStream stream) {
+ return stream->numOutputSamples;
}
/* If skip is greater than one, average skip samples together and write them to
the down-sample buffer. If numChannels is greater than one, mix the channels
together as we down sample. */
-static void downSampleInput(
- sonicStream stream,
- short *samples,
- int skip)
-{
- int numSamples = stream->maxRequired/skip;
- int samplesPerValue = stream->numChannels*skip;
- int i, j;
- int value;
- short *downSamples = stream->downSampleBuffer;
-
- for(i = 0; i < numSamples; i++) {
- value = 0;
- for(j = 0; j < samplesPerValue; j++) {
- value += *samples++;
- }
- value /= samplesPerValue;
- *downSamples++ = value;
- }
+static void downSampleInput(sonicStream stream, short* samples, int skip) {
+ int numSamples = stream->maxRequired / skip;
+ int samplesPerValue = stream->numChannels * skip;
+ int i, j;
+ int value;
+ short* downSamples = stream->downSampleBuffer;
+
+ for (i = 0; i < numSamples; i++) {
+ value = 0;
+ for (j = 0; j < samplesPerValue; j++) {
+ value += *samples++;
+ }
+ value /= samplesPerValue;
+ *downSamples++ = value;
+ }
}
/* Find the best frequency match in the range, and given a sample skip multiple.
For now, just find the pitch of the first channel. */
-static int findPitchPeriodInRange(
- short *samples,
- int minPeriod,
- int maxPeriod,
- int *retMinDiff,
- int *retMaxDiff)
-{
- int period, bestPeriod = 0, worstPeriod = 255;
- short *s, *p, sVal, pVal;
- unsigned long diff, minDiff = 1, maxDiff = 0;
- int i;
-
- for(period = minPeriod; period <= maxPeriod; period++) {
- diff = 0;
- s = samples;
- p = samples + period;
- for(i = 0; i < period; i++) {
- sVal = *s++;
- pVal = *p++;
- diff += sVal >= pVal? (unsigned short)(sVal - pVal) :
- (unsigned short)(pVal - sVal);
- }
- /* Note that the highest number of samples we add into diff will be less
- than 256, since we skip samples. Thus, diff is a 24 bit number, and
- we can safely multiply by numSamples without overflow */
- if(diff*bestPeriod < minDiff*period) {
- minDiff = diff;
- bestPeriod = period;
- }
- if(diff*worstPeriod > maxDiff*period) {
- maxDiff = diff;
- worstPeriod = period;
- }
- }
- *retMinDiff = minDiff/bestPeriod;
- *retMaxDiff = maxDiff/worstPeriod;
- return bestPeriod;
+static int findPitchPeriodInRange(short* samples, int minPeriod, int maxPeriod,
+ int* retMinDiff, int* retMaxDiff) {
+ int period, bestPeriod = 0, worstPeriod = 255;
+ short* s;
+ short* p;
+ short sVal, pVal;
+ unsigned long diff, minDiff = 1, maxDiff = 0;
+ int i;
+
+ for (period = minPeriod; period <= maxPeriod; period++) {
+ diff = 0;
+ s = samples;
+ p = samples + period;
+ for (i = 0; i < period; i++) {
+ sVal = *s++;
+ pVal = *p++;
+ diff += sVal >= pVal ? (unsigned short)(sVal - pVal)
+ : (unsigned short)(pVal - sVal);
+ }
+ /* Note that the highest number of samples we add into diff will be less
+ than 256, since we skip samples. Thus, diff is a 24 bit number, and
+ we can safely multiply by numSamples without overflow */
+ if (bestPeriod == 0 || diff * bestPeriod < minDiff * period) {
+ minDiff = diff;
+ bestPeriod = period;
+ }
+ if (diff * worstPeriod > maxDiff * period) {
+ maxDiff = diff;
+ worstPeriod = period;
+ }
+ }
+ *retMinDiff = minDiff / bestPeriod;
+ *retMaxDiff = maxDiff / worstPeriod;
+ return bestPeriod;
}
/* At abrupt ends of voiced words, we can have pitch periods that are better
- approximated by the previous pitch period estimate. Try to detect this case. */
-static int prevPeriodBetter(
- sonicStream stream,
- int period,
- int minDiff,
- int maxDiff,
- int preferNewPeriod)
-{
- if(minDiff == 0 || stream->prevPeriod == 0) {
- return 0;
- }
- if(preferNewPeriod) {
- if(maxDiff > minDiff*3) {
- /* Got a reasonable match this period */
- return 0;
- }
- if(minDiff*2 <= stream->prevMinDiff*3) {
- /* Mismatch is not that much greater this period */
- return 0;
- }
- } else {
- if(minDiff <= stream->prevMinDiff) {
- return 0;
- }
- }
- return 1;
+ approximated by the previous pitch period estimate. Try to detect this case.
+ */
+static int prevPeriodBetter(sonicStream stream, int minDiff,
+ int maxDiff, int preferNewPeriod) {
+ if (minDiff == 0 || stream->prevPeriod == 0) {
+ return 0;
+ }
+ if (preferNewPeriod) {
+ if (maxDiff > minDiff * 3) {
+ /* Got a reasonable match this period */
+ return 0;
+ }
+ if (minDiff * 2 <= stream->prevMinDiff * 3) {
+ /* Mismatch is not that much greater this period */
+ return 0;
+ }
+ } else {
+ if (minDiff <= stream->prevMinDiff) {
+ return 0;
+ }
+ }
+ return 1;
}
/* Find the pitch period. This is a critical step, and we may have to try
- multiple ways to get a good answer. This version uses AMDF. To improve
- speed, we down sample by an integer factor get in the 11KHz range, and then
- do it again with a narrower frequency range without down sampling */
-static int findPitchPeriod(
- sonicStream stream,
- short *samples,
- int preferNewPeriod)
-{
- int minPeriod = stream->minPeriod;
- int maxPeriod = stream->maxPeriod;
- int sampleRate = stream->sampleRate;
- int minDiff, maxDiff, retPeriod;
- int skip = 1;
- int period;
-
- if(sampleRate > SONIC_AMDF_FREQ && stream->quality == 0) {
- skip = sampleRate/SONIC_AMDF_FREQ;
- }
- if(stream->numChannels == 1 && skip == 1) {
- period = findPitchPeriodInRange(samples, minPeriod, maxPeriod, &minDiff, &maxDiff);
- } else {
- downSampleInput(stream, samples, skip);
- period = findPitchPeriodInRange(stream->downSampleBuffer, minPeriod/skip,
- maxPeriod/skip, &minDiff, &maxDiff);
- if(skip != 1) {
- period *= skip;
- minPeriod = period - (skip << 2);
- maxPeriod = period + (skip << 2);
- if(minPeriod < stream->minPeriod) {
- minPeriod = stream->minPeriod;
- }
- if(maxPeriod > stream->maxPeriod) {
- maxPeriod = stream->maxPeriod;
- }
- if(stream->numChannels == 1) {
- period = findPitchPeriodInRange(samples, minPeriod, maxPeriod,
- &minDiff, &maxDiff);
- } else {
- downSampleInput(stream, samples, 1);
- period = findPitchPeriodInRange(stream->downSampleBuffer, minPeriod,
- maxPeriod, &minDiff, &maxDiff);
- }
- }
- }
- if(prevPeriodBetter(stream, period, minDiff, maxDiff, preferNewPeriod)) {
- retPeriod = stream->prevPeriod;
- } else {
- retPeriod = period;
- }
- stream->prevMinDiff = minDiff;
- stream->prevPeriod = period;
- return retPeriod;
+ multiple ways to get a good answer. This version uses Average Magnitude
+ Difference Function (AMDF). To improve speed, we down sample by an integer
+ factor get in the 11KHz range, and then do it again with a narrower
+ frequency range without down sampling */
+static int findPitchPeriod(sonicStream stream, short* samples,
+ int preferNewPeriod) {
+ int minPeriod = stream->minPeriod;
+ int maxPeriod = stream->maxPeriod;
+ int minDiff, maxDiff, retPeriod;
+ int skip = computeSkip(stream);
+ int period;
+
+ if (stream->numChannels == 1 && skip == 1) {
+ period = findPitchPeriodInRange(samples, minPeriod, maxPeriod, &minDiff,
+ &maxDiff);
+ } else {
+ downSampleInput(stream, samples, skip);
+ period = findPitchPeriodInRange(stream->downSampleBuffer, minPeriod / skip,
+ maxPeriod / skip, &minDiff, &maxDiff);
+ if (skip != 1) {
+ period *= skip;
+ minPeriod = period - (skip << 2);
+ maxPeriod = period + (skip << 2);
+ if (minPeriod < stream->minPeriod) {
+ minPeriod = stream->minPeriod;
+ }
+ if (maxPeriod > stream->maxPeriod) {
+ maxPeriod = stream->maxPeriod;
+ }
+ if (stream->numChannels == 1) {
+ period = findPitchPeriodInRange(samples, minPeriod, maxPeriod, &minDiff,
+ &maxDiff);
+ } else {
+ downSampleInput(stream, samples, 1);
+ period = findPitchPeriodInRange(stream->downSampleBuffer, minPeriod,
+ maxPeriod, &minDiff, &maxDiff);
+ }
+ }
+ }
+ if (prevPeriodBetter(stream, minDiff, maxDiff, preferNewPeriod)) {
+ retPeriod = stream->prevPeriod;
+ } else {
+ retPeriod = period;
+ }
+ stream->prevMinDiff = minDiff;
+ stream->prevPeriod = period;
+ return retPeriod;
}
/* Overlap two sound segments, ramp the volume of one down, while ramping the
other one from zero up, and add them, storing the result at the output. */
-static void overlapAdd(
- int numSamples,
- int numChannels,
- short *out,
- short *rampDown,
- short *rampUp)
-{
- short *o, *u, *d;
- int i, t;
-
- for(i = 0; i < numChannels; i++) {
- o = out + i;
- u = rampUp + i;
- d = rampDown + i;
- for(t = 0; t < numSamples; t++) {
+static void overlapAdd(int numSamples, int numChannels, short* out,
+ short* rampDown, short* rampUp) {
+ short* o;
+ short* u;
+ short* d;
+ int i, t;
+
+ for (i = 0; i < numChannels; i++) {
+ o = out + i;
+ u = rampUp + i;
+ d = rampDown + i;
+ for (t = 0; t < numSamples; t++) {
#ifdef SONIC_USE_SIN
- float ratio = sin(t*M_PI/(2*numSamples));
- *o = *d*(1.0f - ratio) + *u*ratio;
+ float ratio = sin(t * M_PI / (2 * numSamples));
+ *o = *d * (1.0f - ratio) + *u * ratio;
#else
- *o = (*d*(numSamples - t) + *u*t)/numSamples;
+ *o = (*d * (numSamples - t) + *u * t) / numSamples;
#endif
- o += numChannels;
- d += numChannels;
- u += numChannels;
- }
- }
-}
-
-/* Overlap two sound segments, ramp the volume of one down, while ramping the
- other one from zero up, and add them, storing the result at the output. */
-static void overlapAddWithSeparation(
- int numSamples,
- int numChannels,
- int separation,
- short *out,
- short *rampDown,
- short *rampUp)
-{
- short *o, *u, *d;
- int i, t;
-
- for(i = 0; i < numChannels; i++) {
- o = out + i;
- u = rampUp + i;
- d = rampDown + i;
- for(t = 0; t < numSamples + separation; t++) {
- if(t < separation) {
- *o = *d*(numSamples - t)/numSamples;
- d += numChannels;
- } else if(t < numSamples) {
- *o = (*d*(numSamples - t) + *u*(t - separation))/numSamples;
- d += numChannels;
- u += numChannels;
- } else {
- *o = *u*(t - separation)/numSamples;
- u += numChannels;
- }
- o += numChannels;
- }
+ o += numChannels;
+ d += numChannels;
+ u += numChannels;
}
+ }
}
/* Just move the new samples in the output buffer to the pitch buffer */
-static int moveNewSamplesToPitchBuffer(
- sonicStream stream,
- int originalNumOutputSamples)
-{
- int numSamples = stream->numOutputSamples - originalNumOutputSamples;
- int numChannels = stream->numChannels;
-
- if(stream->numPitchSamples + numSamples > stream->pitchBufferSize) {
- stream->pitchBufferSize += (stream->pitchBufferSize >> 1) + numSamples;
- stream->pitchBuffer = (short *)realloc(stream->pitchBuffer,
- stream->pitchBufferSize*sizeof(short)*numChannels);
- if(stream->pitchBuffer == NULL) {
- return 0;
- }
- }
- memcpy(stream->pitchBuffer + stream->numPitchSamples*numChannels,
- stream->outputBuffer + originalNumOutputSamples*numChannels,
- numSamples*sizeof(short)*numChannels);
- stream->numOutputSamples = originalNumOutputSamples;
- stream->numPitchSamples += numSamples;
- return 1;
+static int moveNewSamplesToPitchBuffer(sonicStream stream,
+ int originalNumOutputSamples) {
+ int numSamples = stream->numOutputSamples - originalNumOutputSamples;
+ int numChannels = stream->numChannels;
+
+ if (stream->numPitchSamples + numSamples > stream->pitchBufferSize) {
+ int pitchBufferSize = stream->pitchBufferSize;
+ stream->pitchBufferSize += (pitchBufferSize >> 1) + numSamples;
+ stream->pitchBuffer = (short*)sonicRealloc(
+ stream->pitchBuffer,
+ pitchBufferSize,
+ stream->pitchBufferSize,
+ sizeof(short) * numChannels);
+ }
+ memcpy(stream->pitchBuffer + stream->numPitchSamples * numChannels,
+ stream->outputBuffer + originalNumOutputSamples * numChannels,
+ numSamples * sizeof(short) * numChannels);
+ stream->numOutputSamples = originalNumOutputSamples;
+ stream->numPitchSamples += numSamples;
+ return 1;
}
/* Remove processed samples from the pitch buffer. */
-static void removePitchSamples(
- sonicStream stream,
- int numSamples)
-{
- int numChannels = stream->numChannels;
- short *source = stream->pitchBuffer + numSamples*numChannels;
-
- if(numSamples == 0) {
- return;
- }
- if(numSamples != stream->numPitchSamples) {
- memmove(stream->pitchBuffer, source, (stream->numPitchSamples -
- numSamples)*sizeof(short)*numChannels);
- }
- stream->numPitchSamples -= numSamples;
+static void removePitchSamples(sonicStream stream, int numSamples) {
+ int numChannels = stream->numChannels;
+ short* source = stream->pitchBuffer + numSamples * numChannels;
+
+ if (numSamples == 0) {
+ return;
+ }
+ if (numSamples != stream->numPitchSamples) {
+ memmove(
+ stream->pitchBuffer, source,
+ (stream->numPitchSamples - numSamples) * sizeof(short) * numChannels);
+ }
+ stream->numPitchSamples -= numSamples;
}
-/* Change the pitch. The latency this introduces could be reduced by looking at
- past samples to determine pitch, rather than future. */
-static int adjustPitch(
- sonicStream stream,
- int originalNumOutputSamples)
-{
- float pitch = stream->pitch;
- int numChannels = stream->numChannels;
- int period, newPeriod, separation;
- int position = 0;
- short *out, *rampDown, *rampUp;
-
- if(stream->numOutputSamples == originalNumOutputSamples) {
- return 1;
- }
- if(!moveNewSamplesToPitchBuffer(stream, originalNumOutputSamples)) {
- return 0;
- }
- while(stream->numPitchSamples - position >= stream->maxRequired) {
- period = findPitchPeriod(stream, stream->pitchBuffer + position*numChannels, 0);
- newPeriod = period/pitch;
- if(!enlargeOutputBufferIfNeeded(stream, newPeriod)) {
- return 0;
- }
- out = stream->outputBuffer + stream->numOutputSamples*numChannels;
- if(pitch >= 1.0f) {
- rampDown = stream->pitchBuffer + position*numChannels;
- rampUp = stream->pitchBuffer + (position + period - newPeriod)*numChannels;
- overlapAdd(newPeriod, numChannels, out, rampDown, rampUp);
- } else {
- rampDown = stream->pitchBuffer + position*numChannels;
- rampUp = stream->pitchBuffer + position*numChannels;
- separation = newPeriod - period;
- overlapAddWithSeparation(period, numChannels, separation, out, rampDown, rampUp);
- }
- stream->numOutputSamples += newPeriod;
- position += period;
- }
- removePitchSamples(stream, position);
- return 1;
+/* Approximate the sinc function times a Hann window from the sinc table. */
+static int findSincCoefficient(int i, int ratio, int width) {
+ int lobePoints = (SINC_TABLE_SIZE - 1) / SINC_FILTER_POINTS;
+ int left = i * lobePoints + (ratio * lobePoints) / width;
+ int right = left + 1;
+ int position = i * lobePoints * width + ratio * lobePoints - left * width;
+ int leftVal = sincTable[left];
+ int rightVal = sincTable[right];
+
+ return ((leftVal * (width - position) + rightVal * position) << 1) / width;
}
+/* Return 1 if value >= 0, else -1. This represents the sign of value. */
+static int getSign(int value) { return value >= 0 ? 1 : -1; }
+
/* Interpolate the new output sample. */
-static short interpolate(
- sonicStream stream,
- short *in,
- int oldSampleRate,
- int newSampleRate)
-{
- short left = *in;
- short right = in[stream->numChannels];
- int position = stream->newRatePosition*oldSampleRate;
- int leftPosition = stream->oldRatePosition*newSampleRate;
- int rightPosition = (stream->oldRatePosition + 1)*newSampleRate;
- int ratio = rightPosition - position;
- int width = rightPosition - leftPosition;
-
- return (ratio*left + (width - ratio)*right)/width;
+static short interpolate(sonicStream stream, short* in, int oldSampleRate,
+ int newSampleRate) {
+ /* Compute N-point sinc FIR-filter here. Clip rather than overflow. */
+ int i;
+ int total = 0;
+ int position = stream->newRatePosition * oldSampleRate;
+ int leftPosition = stream->oldRatePosition * newSampleRate;
+ int rightPosition = (stream->oldRatePosition + 1) * newSampleRate;
+ int ratio = rightPosition - position - 1;
+ int width = rightPosition - leftPosition;
+ int weight, value;
+ int oldSign;
+ int overflowCount = 0;
+
+ for (i = 0; i < SINC_FILTER_POINTS; i++) {
+ weight = findSincCoefficient(i, ratio, width);
+ value = in[i * stream->numChannels] * weight;
+ oldSign = getSign(total);
+ total += value;
+ if (oldSign != getSign(total) && getSign(value) == oldSign) {
+ /* We must have overflowed. This can happen with a sinc filter. */
+ overflowCount += oldSign;
+ }
+ }
+ /* It is better to clip than to wrap if there was a overflow. */
+ if (overflowCount > 0) {
+ return SHRT_MAX;
+ } else if (overflowCount < 0) {
+ return SHRT_MIN;
+ }
+ return total >> 16;
}
-/* Change the rate. */
-static int adjustRate(
- sonicStream stream,
- float rate,
- int originalNumOutputSamples)
-{
- int newSampleRate = stream->sampleRate/rate;
- int oldSampleRate = stream->sampleRate;
- int numChannels = stream->numChannels;
- int position = 0;
- short *in, *out;
- int i;
-
- /* Set these values to help with the integer math */
- while(newSampleRate > (1 << 14) || oldSampleRate > (1 << 14)) {
- newSampleRate >>= 1;
- oldSampleRate >>= 1;
- }
- if(stream->numOutputSamples == originalNumOutputSamples) {
- return 1;
- }
- if(!moveNewSamplesToPitchBuffer(stream, originalNumOutputSamples)) {
- return 0;
- }
- /* Leave at least one pitch sample in the buffer */
- for(position = 0; position < stream->numPitchSamples - 1; position++) {
- while((stream->oldRatePosition + 1)*newSampleRate >
- stream->newRatePosition*oldSampleRate) {
- if(!enlargeOutputBufferIfNeeded(stream, 1)) {
- return 0;
- }
- out = stream->outputBuffer + stream->numOutputSamples*numChannels;
- in = stream->pitchBuffer + position;
- for(i = 0; i < numChannels; i++) {
- *out++ = interpolate(stream, in, oldSampleRate, newSampleRate);
- in++;
- }
- stream->newRatePosition++;
- stream->numOutputSamples++;
- }
- stream->oldRatePosition++;
- if(stream->oldRatePosition == oldSampleRate) {
- stream->oldRatePosition = 0;
- if(stream->newRatePosition != newSampleRate) {
- fprintf(stderr,
- "Assertion failed: stream->newRatePosition != newSampleRate\n");
- exit(1);
- }
- stream->newRatePosition = 0;
- }
- }
- removePitchSamples(stream, position);
+/* Change the rate. Interpolate with a sinc FIR filter using a Hann window. */
+static int adjustRate(sonicStream stream, float rate,
+ int originalNumOutputSamples) {
+ int newSampleRate = stream->sampleRate / rate;
+ int oldSampleRate = stream->sampleRate;
+ int numChannels = stream->numChannels;
+ int position;
+ short *in, *out;
+ int i;
+ int N = SINC_FILTER_POINTS;
+
+ /* Set these values to help with the integer math */
+ while (newSampleRate > (1 << 14) || oldSampleRate > (1 << 14)) {
+ newSampleRate >>= 1;
+ oldSampleRate >>= 1;
+ }
+ if (stream->numOutputSamples == originalNumOutputSamples) {
return 1;
+ }
+ if (!moveNewSamplesToPitchBuffer(stream, originalNumOutputSamples)) {
+ return 0;
+ }
+ /* Leave at least N pitch sample in the buffer */
+ for (position = 0; position < stream->numPitchSamples - N; position++) {
+ while ((stream->oldRatePosition + 1) * newSampleRate >
+ stream->newRatePosition * oldSampleRate) {
+ if (!enlargeOutputBufferIfNeeded(stream, 1)) {
+ return 0;
+ }
+ out = stream->outputBuffer + stream->numOutputSamples * numChannels;
+ in = stream->pitchBuffer + position * numChannels;
+ for (i = 0; i < numChannels; i++) {
+ *out++ = interpolate(stream, in, oldSampleRate, newSampleRate);
+ in++;
+ }
+ stream->newRatePosition++;
+ stream->numOutputSamples++;
+ }
+ stream->oldRatePosition++;
+ if (stream->oldRatePosition == oldSampleRate) {
+ stream->oldRatePosition = 0;
+ stream->newRatePosition = 0;
+ }
+ }
+ removePitchSamples(stream, position);
+ return 1;
}
-
-/* Skip over a pitch period, and copy period/speed samples to the output */
-static int skipPitchPeriod(
- sonicStream stream,
- short *samples,
- float speed,
- int period)
-{
- long newSamples;
- int numChannels = stream->numChannels;
-
- if(speed >= 2.0f) {
- newSamples = period/(speed - 1.0f);
- } else {
- newSamples = period;
- stream->remainingInputToCopy = period*(2.0f - speed)/(speed - 1.0f);
- }
- if(!enlargeOutputBufferIfNeeded(stream, newSamples)) {
- return 0;
- }
- overlapAdd(newSamples, numChannels, stream->outputBuffer +
- stream->numOutputSamples*numChannels, samples, samples + period*numChannels);
- stream->numOutputSamples += newSamples;
- return newSamples;
+/* Skip over a pitch period. Return the number of output samples. */
+static int skipPitchPeriod(sonicStream stream, short* samples, float speed,
+ int period) {
+ long newSamples;
+ int numChannels = stream->numChannels;
+
+ if (speed >= 2.0f) {
+ /* For speeds >= 2.0, we skip over a portion of each pitch period rather
+ than dropping whole pitch periods. */
+ newSamples = period / (speed - 1.0f);
+ } else {
+ newSamples = period;
+ }
+ if (!enlargeOutputBufferIfNeeded(stream, newSamples)) {
+ return 0;
+ }
+ overlapAdd(newSamples, numChannels,
+ stream->outputBuffer + stream->numOutputSamples * numChannels,
+ samples, samples + period * numChannels);
+ stream->numOutputSamples += newSamples;
+ return newSamples;
}
/* Insert a pitch period, and determine how much input to copy directly. */
-static int insertPitchPeriod(
- sonicStream stream,
- short *samples,
- float speed,
- int period)
-{
- long newSamples;
- short *out;
- int numChannels = stream->numChannels;
-
- if(speed < 0.5f) {
- newSamples = period*speed/(1.0f - speed);
- } else {
- newSamples = period;
- stream->remainingInputToCopy = period*(2.0f*speed - 1.0f)/(1.0f - speed);
- }
- if(!enlargeOutputBufferIfNeeded(stream, period + newSamples)) {
- return 0;
- }
- out = stream->outputBuffer + stream->numOutputSamples*numChannels;
- memcpy(out, samples, period*sizeof(short)*numChannels);
- out = stream->outputBuffer + (stream->numOutputSamples + period)*numChannels;
- overlapAdd(newSamples, numChannels, out, samples + period*numChannels, samples);
- stream->numOutputSamples += period + newSamples;
- return newSamples;
+static int insertPitchPeriod(sonicStream stream, short* samples, float speed,
+ int period) {
+ long newSamples;
+ short* out;
+ int numChannels = stream->numChannels;
+
+ if (speed <= 0.5f) {
+ newSamples = period * speed / (1.0f - speed);
+ } else {
+ newSamples = period;
+ }
+ if (!enlargeOutputBufferIfNeeded(stream, period + newSamples)) {
+ return 0;
+ }
+ out = stream->outputBuffer + stream->numOutputSamples * numChannels;
+ memcpy(out, samples, period * sizeof(short) * numChannels);
+ out =
+ stream->outputBuffer + (stream->numOutputSamples + period) * numChannels;
+ overlapAdd(newSamples, numChannels, out, samples + period * numChannels,
+ samples);
+ stream->numOutputSamples += period + newSamples;
+ return newSamples;
+}
+
+/* PICOLA copies input to output until the total output samples == consumed
+ input samples * speed. */
+static int copyUnmodifiedSamples(sonicStream stream, short* samples,
+ float speed, int position, int* newSamples) {
+ int availableSamples = stream->numInputSamples - position;
+ float inputToCopyFloat =
+ 1 - stream->timeError * speed / (stream->samplePeriod * (speed - 1.0));
+
+ *newSamples = inputToCopyFloat > availableSamples ? availableSamples
+ : (int)inputToCopyFloat;
+ if (!copyToOutput(stream, samples, *newSamples)) {
+ return 0;
+ }
+ stream->timeError +=
+ *newSamples * stream->samplePeriod * (speed - 1.0) / speed;
+ return 1;
}
/* Resample as many pitch periods as we have buffered on the input. Return 0 if
- we fail to resize an input or output buffer. Also scale the output by the volume. */
-static int changeSpeed(
- sonicStream stream,
- float speed)
-{
- short *samples;
- int numSamples = stream->numInputSamples;
- int position = 0, period, newSamples;
- int maxRequired = stream->maxRequired;
-
- if(stream->numInputSamples < maxRequired) {
- return 1;
- }
- do {
- if(stream->remainingInputToCopy > 0) {
- newSamples = copyInputToOutput(stream, position);
- position += newSamples;
- } else {
- samples = stream->inputBuffer + position*stream->numChannels;
- period = findPitchPeriod(stream, samples, 1);
- if(speed > 1.0) {
- newSamples = skipPitchPeriod(stream, samples, speed, period);
- position += period + newSamples;
- } else {
- newSamples = insertPitchPeriod(stream, samples, speed, period);
- position += newSamples;
- }
- }
- if(newSamples == 0) {
- return 0; /* Failed to resize output buffer */
- }
- } while(position + maxRequired <= numSamples);
- removeInputSamples(stream, position);
+ we fail to resize an input or output buffer. */
+static int changeSpeed(sonicStream stream, float speed) {
+ short* samples;
+ int numSamples = stream->numInputSamples;
+ int position = 0, period, newSamples;
+ int maxRequired = stream->maxRequired;
+
+ if (stream->numInputSamples < maxRequired) {
return 1;
+ }
+ do {
+ samples = stream->inputBuffer + position * stream->numChannels;
+ if ((speed > 1.0f && speed < 2.0f && stream->timeError < 0.0f) ||
+ (speed < 1.0f && speed > 0.5f && stream->timeError > 0.0f)) {
+ /* Deal with the case where PICOLA is still copying input samples to
+ output unmodified, */
+ if (!copyUnmodifiedSamples(stream, samples, speed, position,
+ &newSamples)) {
+ return 0;
+ }
+ position += newSamples;
+ } else {
+ /* We are in the remaining cases, either inserting/removing a pitch period
+ for speed < 2.0X, or a portion of one for speed >= 2.0X. */
+ period = findPitchPeriod(stream, samples, 1);
+#ifdef SONIC_SPECTROGRAM
+ if (stream->spectrogram != NULL) {
+ sonicAddPitchPeriodToSpectrogram(stream->spectrogram, samples, period,
+ stream->numChannels);
+ newSamples = period;
+ position += period;
+ } else
+#endif /* SONIC_SPECTROGRAM */
+ if (speed > 1.0) {
+ newSamples = skipPitchPeriod(stream, samples, speed, period);
+ position += period + newSamples;
+ if (speed < 2.0) {
+ stream->timeError += newSamples * stream->samplePeriod -
+ (period + newSamples) * stream->inputPlayTime /
+ stream->numInputSamples;
+ }
+ } else {
+ newSamples = insertPitchPeriod(stream, samples, speed, period);
+ position += newSamples;
+ if (speed > 0.5) {
+ stream->timeError +=
+ (period + newSamples) * stream->samplePeriod -
+ newSamples * stream->inputPlayTime / stream->numInputSamples;
+ }
+ }
+ if (newSamples == 0) {
+ return 0; /* Failed to resize output buffer */
+ }
+ }
+ } while (position + maxRequired <= numSamples);
+ removeInputSamples(stream, position);
+ return 1;
}
/* Resample as many pitch periods as we have buffered on the input. Return 0 if
- we fail to resize an input or output buffer. Also scale the output by the volume. */
-static int processStreamInput(
- sonicStream stream)
-{
- int originalNumOutputSamples = stream->numOutputSamples;
- float speed = stream->speed/stream->pitch;
- float rate = stream->rate;
-
- if(!stream->useChordPitch) {
- rate *= stream->pitch;
- }
- if(speed > 1.00001 || speed < 0.99999) {
- changeSpeed(stream, speed);
- } else {
- if(!copyToOutput(stream, stream->inputBuffer, stream->numInputSamples)) {
- return 0;
- }
- stream->numInputSamples = 0;
- }
- if(stream->useChordPitch) {
- if(stream->pitch != 1.0f) {
- if(!adjustPitch(stream, originalNumOutputSamples)) {
- return 0;
- }
- }
- } else if(rate != 1.0f) {
- if(!adjustRate(stream, rate, originalNumOutputSamples)) {
- return 0;
- }
- }
- if(stream->volume != 1.0f) {
- /* Adjust output volume. */
- scaleSamples(stream->outputBuffer + originalNumOutputSamples*stream->numChannels,
- (stream->numOutputSamples - originalNumOutputSamples)*stream->numChannels,
- stream->volume);
- }
+ we fail to resize an input or output buffer. Also scale the output by the
+ volume. */
+static int processStreamInput(sonicStream stream) {
+ int originalNumOutputSamples = stream->numOutputSamples;
+ float rate = stream->rate * stream->pitch;
+ float localSpeed;
+
+ if (stream->numInputSamples == 0) {
return 1;
+ }
+ localSpeed =
+ stream->numInputSamples * stream->samplePeriod / stream->inputPlayTime;
+ if (localSpeed > 1.00001 || localSpeed < 0.99999) {
+ changeSpeed(stream, localSpeed);
+ } else {
+ if (!copyInputToOutput(stream, stream->numInputSamples)) {
+ return 0;
+ }
+ }
+ if (rate != 1.0f) {
+ if (!adjustRate(stream, rate, originalNumOutputSamples)) {
+ return 0;
+ }
+ }
+ if (stream->volume != 1.0f) {
+ /* Adjust output volume. */
+ scaleSamples(
+ stream->outputBuffer + originalNumOutputSamples * stream->numChannels,
+ (stream->numOutputSamples - originalNumOutputSamples) *
+ stream->numChannels,
+ stream->volume);
+ }
+ return 1;
}
/* Write floating point data to the input buffer and process it. */
-int sonicWriteFloatToStream(
- sonicStream stream,
- float *samples,
- int numSamples)
-{
- if(!addFloatSamplesToInputBuffer(stream, samples, numSamples)) {
- return 0;
- }
- return processStreamInput(stream);
+int sonicWriteFloatToStream(sonicStream stream, const float* samples,
+ int numSamples) {
+ if (!addFloatSamplesToInputBuffer(stream, samples, numSamples)) {
+ return 0;
+ }
+ return processStreamInput(stream);
}
/* Simple wrapper around sonicWriteFloatToStream that does the short to float
conversion for you. */
-int sonicWriteShortToStream(
- sonicStream stream,
- short *samples,
- int numSamples)
-{
- if(!addShortSamplesToInputBuffer(stream, samples, numSamples)) {
- return 0;
- }
- return processStreamInput(stream);
+int sonicWriteShortToStream(sonicStream stream, const short* samples,
+ int numSamples) {
+ if (!addShortSamplesToInputBuffer(stream, samples, numSamples)) {
+ return 0;
+ }
+ return processStreamInput(stream);
}
-/* Simple wrapper around sonicWriteFloatToStream that does the unsigned char to float
- conversion for you. */
-int sonicWriteUnsignedCharToStream(
- sonicStream stream,
- unsigned char *samples,
- int numSamples)
-{
- if(!addUnsignedCharSamplesToInputBuffer(stream, samples, numSamples)) {
- return 0;
- }
- return processStreamInput(stream);
+/* Simple wrapper around sonicWriteFloatToStream that does the unsigned char to
+ float conversion for you. */
+int sonicWriteUnsignedCharToStream(sonicStream stream, const unsigned char* samples,
+ int numSamples) {
+ if (!addUnsignedCharSamplesToInputBuffer(stream, samples, numSamples)) {
+ return 0;
+ }
+ return processStreamInput(stream);
}
-/* This is a non-stream oriented interface to just change the speed of a sound sample */
-int sonicChangeFloatSpeed(
- float *samples,
- int numSamples,
- float speed,
- float pitch,
- float rate,
- float volume,
- int useChordPitch,
- int sampleRate,
- int numChannels)
-{
- sonicStream stream = sonicCreateStream(sampleRate, numChannels);
-
- sonicSetSpeed(stream, speed);
- sonicSetPitch(stream, pitch);
- sonicSetRate(stream, rate);
- sonicSetVolume(stream, volume);
- sonicSetChordPitch(stream, useChordPitch);
- sonicWriteFloatToStream(stream, samples, numSamples);
- sonicFlushStream(stream);
- numSamples = sonicSamplesAvailable(stream);
- sonicReadFloatFromStream(stream, samples, numSamples);
- sonicDestroyStream(stream);
- return numSamples;
+/* This is a non-stream oriented interface to just change the speed of a sound
+ * sample */
+int sonicChangeFloatSpeed(float* samples, int numSamples, float speed,
+ float pitch, float rate, float volume,
+ int useChordPitch, int sampleRate, int numChannels) {
+ sonicStream stream = sonicCreateStream(sampleRate, numChannels);
+
+ sonicSetSpeed(stream, speed);
+ sonicSetPitch(stream, pitch);
+ sonicSetRate(stream, rate);
+ sonicSetVolume(stream, volume);
+ sonicWriteFloatToStream(stream, samples, numSamples);
+ sonicFlushStream(stream);
+ numSamples = sonicSamplesAvailable(stream);
+ sonicReadFloatFromStream(stream, samples, numSamples);
+ sonicDestroyStream(stream);
+ return numSamples;
}
-/* This is a non-stream oriented interface to just change the speed of a sound sample */
-int sonicChangeShortSpeed(
- short *samples,
- int numSamples,
- float speed,
- float pitch,
- float rate,
- float volume,
- int useChordPitch,
- int sampleRate,
- int numChannels)
-{
- sonicStream stream = sonicCreateStream(sampleRate, numChannels);
-
- sonicSetSpeed(stream, speed);
- sonicSetPitch(stream, pitch);
- sonicSetRate(stream, rate);
- sonicSetVolume(stream, volume);
- sonicSetChordPitch(stream, useChordPitch);
- sonicWriteShortToStream(stream, samples, numSamples);
- sonicFlushStream(stream);
- numSamples = sonicSamplesAvailable(stream);
- sonicReadShortFromStream(stream, samples, numSamples);
- sonicDestroyStream(stream);
- return numSamples;
+/* This is a non-stream oriented interface to just change the speed of a sound
+ * sample */
+int sonicChangeShortSpeed(short* samples, int numSamples, float speed,
+ float pitch, float rate, float volume,
+ int useChordPitch, int sampleRate, int numChannels) {
+ sonicStream stream = sonicCreateStream(sampleRate, numChannels);
+
+ sonicSetSpeed(stream, speed);
+ sonicSetPitch(stream, pitch);
+ sonicSetRate(stream, rate);
+ sonicSetVolume(stream, volume);
+ sonicWriteShortToStream(stream, samples, numSamples);
+ sonicFlushStream(stream);
+ numSamples = sonicSamplesAvailable(stream);
+ sonicReadShortFromStream(stream, samples, numSamples);
+ sonicDestroyStream(stream);
+ return numSamples;
}
diff --git a/sonic.h b/sonic.h
index 9b44e68..ea439b0 100644
--- a/sonic.h
+++ b/sonic.h
@@ -30,7 +30,7 @@ where t = 0 to newSamples - 1.
For speed factors < 2X, the PICOLA algorithm is used. The above
algorithm is first used to double the speed of one pitch period. Then, enough
input is directly copied from the input to the output to achieve the desired
-speed up facter, where 1.0 < speed < 2.0. The amount of data copied is derived:
+speed up factor, where 1.0 < speed < 2.0. The amount of data copied is derived:
speed = (2*period + length)/(period + length)
speed*length + speed*period = 2*period + length
@@ -47,24 +47,76 @@ For slow down factors below 0.5, no data is copied, and an algorithm
similar to high speed factors is used.
*/
-#ifdef __cplusplus
-extern "C" {
-#endif
-
/* Uncomment this to use sin-wav based overlap add which in theory can improve
sound quality slightly, at the expense of lots of floating point math. */
/* #define SONIC_USE_SIN */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef SONIC_INTERNAL
+/* The following #define's are used to change the names of the routines defined
+ * here so that a new library (sonic2) can reuse these names, and then call
+ * the original names. We do this for two reasons: 1) we don't want to change
+ * the original API, and 2) we want to add a shim, using the original names and
+ * still call these routines.
+ *
+ * Original users of this API and the libsonic library need to do nothing. The
+ * original behavior remains.
+ *
+ * A new user that add some additional functionality above this library (a shim)
+ * should #define SONIC_INTERNAL before including this file, undefine all these
+ * symbols and call the sonicIntXXX functions directly.
+ */
+#define sonicCreateStream sonicIntCreateStream
+#define sonicDestroyStream sonicIntDestroyStream
+#define sonicWriteFloatToStream sonicIntWriteFloatToStream
+#define sonicWriteShortToStream sonicIntWriteShortToStream
+#define sonicWriteUnsignedCharToStream sonicIntWriteUnsignedCharToStream
+#define sonicReadFloatFromStream sonicIntReadFloatFromStream
+#define sonicReadShortFromStream sonicIntReadShortFromStream
+#define sonicReadUnsignedCharFromStream sonicIntReadUnsignedCharFromStream
+#define sonicFlushStream sonicIntFlushStream
+#define sonicSamplesAvailable sonicIntSamplesAvailable
+#define sonicGetSpeed sonicIntGetSpeed
+#define sonicSetSpeed sonicIntSetSpeed
+#define sonicGetPitch sonicIntGetPitch
+#define sonicSetPitch sonicIntSetPitch
+#define sonicGetRate sonicIntGetRate
+#define sonicSetRate sonicIntSetRate
+#define sonicGetVolume sonicIntGetVolume
+#define sonicSetVolume sonicIntSetVolume
+#define sonicGetQuality sonicIntGetQuality
+#define sonicSetQuality sonicIntSetQuality
+#define sonicGetSampleRate sonicIntGetSampleRate
+#define sonicSetSampleRate sonicIntSetSampleRate
+#define sonicGetNumChannels sonicIntGetNumChannels
+#define sonicGetUserData sonicIntGetUserData
+#define sonicSetUserData sonicIntSetUserData
+#define sonicSetNumChannels sonicIntSetNumChannels
+#define sonicChangeFloatSpeed sonicIntChangeFloatSpeed
+#define sonicChangeShortSpeed sonicIntChangeShortSpeed
+#define sonicEnableNonlinearSpeedup sonicIntEnableNonlinearSpeedup
+#define sonicComputeSpectrogram sonicIntComputeSpectrogram
+#define sonicGetSpectrogram sonicIntGetSpectrogram
+
+#endif /* SONIC_INTERNAL */
+
/* This specifies the range of voice pitches we try to match.
Note that if we go lower than 65, we could overflow in findPitchInRange */
+#ifndef SONIC_MIN_PITCH
#define SONIC_MIN_PITCH 65
+#endif /* SONIC_MIN_PITCH */
+#ifndef SONIC_MAX_PITCH
#define SONIC_MAX_PITCH 400
+#endif /* SONIC_MAX_PITCH */
/* These are used to down-sample some inputs to improve speed */
#define SONIC_AMDF_FREQ 4000
struct sonicStreamStruct;
-typedef struct sonicStreamStruct *sonicStream;
+typedef struct sonicStreamStruct* sonicStream;
/* For all of the following functions, numChannels is multiplied by numSamples
to determine the actual number of values read or returned. */
@@ -74,27 +126,36 @@ typedef struct sonicStreamStruct *sonicStream;
sonicStream sonicCreateStream(int sampleRate, int numChannels);
/* Destroy the sonic stream. */
void sonicDestroyStream(sonicStream stream);
+/* Attach user data to the stream. */
+void sonicSetUserData(sonicStream stream, void *userData);
+/* Retrieve user data attached to the stream. */
+void *sonicGetUserData(sonicStream stream);
/* Use this to write floating point data to be speed up or down into the stream.
- Values must be between -1 and 1. Return 0 if memory realloc failed, otherwise 1 */
-int sonicWriteFloatToStream(sonicStream stream, float *samples, int numSamples);
+ Values must be between -1 and 1. Return 0 if memory realloc failed,
+ otherwise 1 */
+int sonicWriteFloatToStream(sonicStream stream, const float* samples, int numSamples);
/* Use this to write 16-bit data to be speed up or down into the stream.
Return 0 if memory realloc failed, otherwise 1 */
-int sonicWriteShortToStream(sonicStream stream, short *samples, int numSamples);
+int sonicWriteShortToStream(sonicStream stream, const short* samples, int numSamples);
/* Use this to write 8-bit unsigned data to be speed up or down into the stream.
Return 0 if memory realloc failed, otherwise 1 */
-int sonicWriteUnsignedCharToStream(sonicStream stream, unsigned char *samples, int numSamples);
+int sonicWriteUnsignedCharToStream(sonicStream stream, const unsigned char* samples,
+ int numSamples);
/* Use this to read floating point data out of the stream. Sometimes no data
will be available, and zero is returned, which is not an error condition. */
-int sonicReadFloatFromStream(sonicStream stream, float *samples, int maxSamples);
+int sonicReadFloatFromStream(sonicStream stream, float* samples,
+ int maxSamples);
/* Use this to read 16-bit data out of the stream. Sometimes no data will
be available, and zero is returned, which is not an error condition. */
-int sonicReadShortFromStream(sonicStream stream, short *samples, int maxSamples);
-/* Use this to read 8-bit unsigned data out of the stream. Sometimes no data will
- be available, and zero is returned, which is not an error condition. */
-int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char *samples, int maxSamples);
+int sonicReadShortFromStream(sonicStream stream, short* samples,
+ int maxSamples);
+/* Use this to read 8-bit unsigned data out of the stream. Sometimes no data
+ will be available, and zero is returned, which is not an error condition. */
+int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char* samples,
+ int maxSamples);
/* Force the sonic stream to generate output using whatever data it currently
- has. No extra delay will be added to the output, but flushing in the middle of
- words could introduce distortion. */
+ has. No extra delay will be added to the output, but flushing in the middle
+ of words could introduce distortion. */
int sonicFlushStream(sonicStream stream);
/* Return the number of samples in the output buffer */
int sonicSamplesAvailable(sonicStream stream);
@@ -114,6 +175,8 @@ void sonicSetRate(sonicStream stream, float rate);
float sonicGetVolume(sonicStream stream);
/* Set the scaling factor of the stream. */
void sonicSetVolume(sonicStream stream, float volume);
+/* Chord pitch is DEPRECATED. AFAIK, it was never used by anyone. These
+ functions still exist to avoid breaking existing code. */
/* Get the chord pitch setting. */
int sonicGetChordPitch(sonicStream stream);
/* Set chord pitch mode on or off. Default is off. See the documentation
@@ -121,27 +184,102 @@ int sonicGetChordPitch(sonicStream stream);
void sonicSetChordPitch(sonicStream stream, int useChordPitch);
/* Get the quality setting. */
int sonicGetQuality(sonicStream stream);
-/* Set the "quality". Default 0 is virtually as good as 1, but very much faster. */
+/* Set the "quality". Default 0 is virtually as good as 1, but very much
+ * faster. */
void sonicSetQuality(sonicStream stream, int quality);
/* Get the sample rate of the stream. */
int sonicGetSampleRate(sonicStream stream);
-/* Set the sample rate of the stream. This will drop any samples that have not been read. */
+/* Set the sample rate of the stream. This will drop any samples that have not
+ * been read. */
void sonicSetSampleRate(sonicStream stream, int sampleRate);
/* Get the number of channels. */
int sonicGetNumChannels(sonicStream stream);
-/* Set the number of channels. This will drop any samples that have not been read. */
+/* Set the number of channels. This will drop any samples that have not been
+ * read. */
void sonicSetNumChannels(sonicStream stream, int numChannels);
/* This is a non-stream oriented interface to just change the speed of a sound
sample. It works in-place on the sample array, so there must be at least
- speed*numSamples available space in the array. Returns the new number of samples. */
-int sonicChangeFloatSpeed(float *samples, int numSamples, float speed, float pitch,
- float rate, float volume, int useChordPitch, int sampleRate, int numChannels);
+ speed*numSamples available space in the array. Returns the new number of
+ samples. */
+int sonicChangeFloatSpeed(float* samples, int numSamples, float speed,
+ float pitch, float rate, float volume,
+ int useChordPitch, int sampleRate, int numChannels);
/* This is a non-stream oriented interface to just change the speed of a sound
sample. It works in-place on the sample array, so there must be at least
- speed*numSamples available space in the array. Returns the new number of samples. */
-int sonicChangeShortSpeed(short *samples, int numSamples, float speed, float pitch,
- float rate, float volume, int useChordPitch, int sampleRate, int numChannels);
+ speed*numSamples available space in the array. Returns the new number of
+ samples. */
+int sonicChangeShortSpeed(short* samples, int numSamples, float speed,
+ float pitch, float rate, float volume,
+ int useChordPitch, int sampleRate, int numChannels);
+
+#ifdef SONIC_SPECTROGRAM
+/*
+This code generates high quality spectrograms from sound samples, using
+Time-Aliased-FFTs as described at:
+
+ https://github.com/waywardgeek/spectrogram
+
+Basically, two adjacent pitch periods are overlap-added to create a sound
+sample that accurately represents the speech sound at that moment in time.
+This set of samples is converted to a spetral line using an FFT, and the result
+is saved as a single spectral line at that moment in time. The resulting
+spectral lines vary in resolution (it is equal to the number of samples in the
+pitch period), and the spacing of spectral lines also varies (proportional to
+the numver of samples in the pitch period).
+
+To generate a bitmap, linear interpolation is used to render the grayscale
+value at any particular point in time and frequency.
+*/
+
+#define SONIC_MAX_SPECTRUM_FREQ 5000
+
+struct sonicSpectrogramStruct;
+struct sonicBitmapStruct;
+typedef struct sonicSpectrogramStruct* sonicSpectrogram;
+typedef struct sonicBitmapStruct* sonicBitmap;
+
+/* sonicBitmap objects represent spectrograms as grayscale bitmaps where each
+ pixel is from 0 (black) to 255 (white). Bitmaps are rows*cols in size.
+ Rows are indexed top to bottom and columns are indexed left to right */
+struct sonicBitmapStruct {
+ unsigned char* data;
+ int numRows;
+ int numCols;
+};
+
+typedef struct sonicBitmapStruct* sonicBitmap;
+
+/* Enable coomputation of a spectrogram on the fly. */
+void sonicComputeSpectrogram(sonicStream stream);
+
+/* Get the spectrogram. */
+sonicSpectrogram sonicGetSpectrogram(sonicStream stream);
+
+/* Create an empty spectrogram. Called automatically if sonicComputeSpectrogram
+ has been called. */
+sonicSpectrogram sonicCreateSpectrogram(int sampleRate);
+
+/* Destroy the spectrotram. This is called automatically when calling
+ sonicDestroyStream. */
+void sonicDestroySpectrogram(sonicSpectrogram spectrogram);
+
+/* Convert the spectrogram to a bitmap. Caller must destroy bitmap when done. */
+sonicBitmap sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram,
+ int numRows, int numCols);
+
+/* Destroy a bitmap returned by sonicConvertSpectrogramToBitmap. */
+void sonicDestroyBitmap(sonicBitmap bitmap);
+
+int sonicWritePGM(sonicBitmap bitmap, char* fileName);
+
+/* Add two pitch periods worth of samples to the spectrogram. There must be
+ 2*period samples. Time should advance one pitch period for each call to
+ this function. */
+void sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram,
+ short* samples, int numSamples,
+ int numChannels);
+#endif /* SONIC_SPECTROGRAM */
-#ifdef __cplusplus
+#ifdef __cplusplus
}
#endif
diff --git a/sonic_lite.c b/sonic_lite.c
new file mode 100644
index 0000000..a3397b4
--- /dev/null
+++ b/sonic_lite.c
@@ -0,0 +1,371 @@
+/* Sonic library
+ Copyright 2010
+ Bill Cox
+ This file is part of the Sonic Library.
+
+ This file is licensed under the Apache 2.0 license.
+*/
+
+/* This file is designed for low-powered microcontrollers, minimizing memory
+ compared to the fuller sonic.c implementation. */
+
+#include "sonic_lite.h"
+
+#include <string.h>
+
+#define SONIC_MAX_PERIOD (SONIC_SAMPLE_RATE / SONIC_MIN_PITCH)
+#define SONIC_MIN_PERIOD (SONIC_SAMPLE_RATE / SONIC_MAX_PITCH)
+#define SONIC_SKIP (SONIC_SAMPLE_RATE / SONIC_AMDF_FREQ)
+#define SONIC_INPUT_BUFFER_SIZE (2 * SONIC_MAX_PERIOD + SONIC_INPUT_SAMPLES)
+
+struct sonicStruct {
+ short inputBuffer[SONIC_INPUT_BUFFER_SIZE];
+ short outputBuffer [2 * SONIC_MAX_PERIOD];
+ short downSampleBuffer[(2 * SONIC_MAX_PERIOD) / SONIC_SKIP];
+ float speed;
+ float volume;
+ int numInputSamples;
+ int numOutputSamples;
+ int remainingInputToCopy;
+ int prevPeriod;
+ int prevMinDiff;
+};
+
+static struct sonicStruct sonicStream;
+
+/* Scale the samples by the factor. Volume should be no greater than 127X, or
+ it is possible to overflow the fixed-point mathi. */
+static void scaleSamples(short *samples, int numSamples, float volume) {
+ /* This is 24-bit integer and 8-bit fraction fixed-point representation. */
+ int fixedPointVolume;
+ int value;
+
+ if (volume > 127.0) {
+ volume = 127.0;
+ }
+ fixedPointVolume = volume * 256.0f;
+ while (numSamples--) {
+ value = (*samples * fixedPointVolume) >> 8;
+ if (value > 32767) {
+ value = 32767;
+ } else if (value < -32767) {
+ value = -32767;
+ }
+ *samples++ = value;
+ }
+}
+
+/* Set the speed of the stream. */
+void sonicSetSpeed(float speed) { sonicStream.speed = speed; }
+
+/* Set the scaling factor of the stream. */
+void sonicSetVolume(float volume) {
+ sonicStream.volume = volume;
+}
+
+/* Create a sonic stream. Return NULL only if we are out of memory and cannot
+ allocate the stream. */
+void sonicInit(void) {
+ sonicStream.speed = 1.0;
+ sonicStream.volume = 1.0f;
+ sonicStream.numInputSamples = 0;;
+ sonicStream.numOutputSamples = 0;
+ sonicStream.remainingInputToCopy = 0;
+ sonicStream.prevPeriod = 0;
+ sonicStream.prevMinDiff = 0;
+}
+
+/* Add the input samples to the input buffer. */
+static int addShortSamplesToInputBuffer(short *samples,
+ int numSamples) {
+ if (numSamples == 0) {
+ return 1;
+ }
+ memcpy(sonicStream.inputBuffer + sonicStream.numInputSamples,
+ samples, numSamples * sizeof(short));
+ sonicStream.numInputSamples += numSamples;
+ return 1;
+}
+
+/* Remove input samples that we have already processed. */
+static void removeInputSamples(int position) {
+ int remainingSamples = sonicStream.numInputSamples - position;
+
+ if (remainingSamples > 0) {
+ memmove(sonicStream.inputBuffer,
+ sonicStream.inputBuffer + position,
+ remainingSamples * sizeof(short));
+ }
+ sonicStream.numInputSamples = remainingSamples;
+}
+
+/* Just copy from the array to the output buffer */
+static void copyToOutput(short *samples, int numSamples) {
+ memcpy(sonicStream.outputBuffer + sonicStream.numOutputSamples,
+ samples, numSamples * sizeof(short));
+ sonicStream.numOutputSamples += numSamples;
+}
+
+/* Just copy from the input buffer to the output buffer. */
+static int copyInputToOutput(int position) {
+ int numSamples = sonicStream.remainingInputToCopy;
+
+ if (numSamples > 2 * SONIC_MAX_PERIOD) {
+ numSamples = 2 * SONIC_MAX_PERIOD;
+ }
+ copyToOutput(sonicStream.inputBuffer + position, numSamples);
+ sonicStream.remainingInputToCopy -= numSamples;
+ return numSamples;
+}
+
+/* Read short data out of the stream. Sometimes no data will be available, and
+ zero is returned, which is not an error condition. */
+int sonicReadShortFromStream(short *samples, int maxSamples) {
+ int numSamples = sonicStream.numOutputSamples;
+ int remainingSamples = 0;
+
+ if (numSamples == 0) {
+ return 0;
+ }
+ if (numSamples > maxSamples) {
+ remainingSamples = numSamples - maxSamples;
+ numSamples = maxSamples;
+ }
+ memcpy(samples, sonicStream.outputBuffer, numSamples * sizeof(short));
+ if (remainingSamples > 0) {
+ memmove(sonicStream.outputBuffer, sonicStream.outputBuffer + numSamples,
+ remainingSamples * sizeof(short));
+ }
+ sonicStream.numOutputSamples = remainingSamples;
+ return numSamples;
+}
+
+/* Force the sonic stream to generate output using whatever data it currently
+ has. No extra delay will be added to the output, but flushing in the middle
+ of words could introduce distortion. */
+void sonicFlushStream(void) {
+ int maxRequired = 2 * SONIC_MAX_PERIOD;
+ int remainingSamples = sonicStream.numInputSamples;
+ float speed = sonicStream.speed;
+ int expectedOutputSamples = sonicStream.numOutputSamples + (int)((remainingSamples / speed) + 0.5f);
+
+ memset(sonicStream.inputBuffer + remainingSamples, 0,
+ sizeof(short) * (SONIC_INPUT_BUFFER_SIZE - remainingSamples));
+ sonicStream.numInputSamples += 2 * maxRequired;
+ sonicWriteShortToStream(NULL, 0);
+ /* Throw away any extra samples we generated due to the silence we added */
+ if (sonicStream.numOutputSamples > expectedOutputSamples) {
+ sonicStream.numOutputSamples = expectedOutputSamples;
+ }
+ /* Empty input buffer */
+ sonicStream.numInputSamples = 0;
+ sonicStream.remainingInputToCopy = 0;
+}
+
+/* Return the number of samples in the output buffer */
+int sonicSamplesAvailable(void) {
+ return sonicStream.numOutputSamples;
+}
+
+/* If skip is greater than one, average skip samples together and write them to
+ the down-sample buffer. */
+static void downSampleInput(short *samples) {
+ int numSamples = 2 * SONIC_MAX_PERIOD / SONIC_SKIP;
+ int i, j;
+ int value;
+ short *downSamples = sonicStream.downSampleBuffer;
+
+ for (i = 0; i < numSamples; i++) {
+ value = 0;
+ for (j = 0; j < SONIC_SKIP; j++) {
+ value += *samples++;
+ }
+ value /= SONIC_SKIP;
+ *downSamples++ = value;
+ }
+}
+
+/* Find the best frequency match in the range, and given a sample skip multiple.
+ For now, just find the pitch of the first channel. */
+static int findPitchPeriodInRange(short *samples, int minPeriod, int maxPeriod,
+ int* retMinDiff, int* retMaxDiff) {
+ int period, bestPeriod = 0, worstPeriod = 255;
+ short *s;
+ short *p;
+ short sVal, pVal;
+ unsigned long diff, minDiff = 1, maxDiff = 0;
+ int i;
+
+ for (period = minPeriod; period <= maxPeriod; period++) {
+ diff = 0;
+ s = samples;
+ p = samples + period;
+ for (i = 0; i < period; i++) {
+ sVal = *s++;
+ pVal = *p++;
+ diff += sVal >= pVal ? (unsigned short)(sVal - pVal)
+ : (unsigned short)(pVal - sVal);
+ }
+ /* Note that the highest number of samples we add into diff will be less
+ than 256, since we skip samples. Thus, diff is a 24 bit number, and
+ we can safely multiply by numSamples without overflow */
+ if (bestPeriod == 0 || diff * bestPeriod < minDiff * period) {
+ minDiff = diff;
+ bestPeriod = period;
+ }
+ if (diff * worstPeriod > maxDiff * period) {
+ maxDiff = diff;
+ worstPeriod = period;
+ }
+ }
+ *retMinDiff = minDiff / bestPeriod;
+ *retMaxDiff = maxDiff / worstPeriod;
+ return bestPeriod;
+}
+
+/* At abrupt ends of voiced words, we can have pitch periods that are better
+ approximated by the previous pitch period estimate. Try to detect this case. */
+static int prevPeriodBetter(int minDiff, int maxDiff, int preferNewPeriod) {
+ if (minDiff == 0 || sonicStream.prevPeriod == 0) {
+ return 0;
+ }
+ if (preferNewPeriod) {
+ if (maxDiff > minDiff * 3) {
+ /* Got a reasonable match this period */
+ return 0;
+ }
+ if (minDiff * 2 <= sonicStream.prevMinDiff * 3) {
+ /* Mismatch is not that much greater this period */
+ return 0;
+ }
+ } else {
+ if (minDiff <= sonicStream.prevMinDiff) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/* Find the pitch period. This is a critical step, and we may have to try
+ multiple ways to get a good answer. This version uses Average Magnitude
+ Difference Function (AMDF). To improve speed, we down sample by an integer
+ factor get in the 11KHz range, and then do it again with a narrower
+ frequency range without down sampling */
+static int findPitchPeriod(short *samples, int preferNewPeriod) {
+ int minPeriod = SONIC_MIN_PERIOD;
+ int maxPeriod = SONIC_MAX_PERIOD;
+ int minDiff, maxDiff, retPeriod;
+ int period;
+
+ if (SONIC_SKIP == 1) {
+ period = findPitchPeriodInRange(samples, minPeriod, maxPeriod, &minDiff, &maxDiff);
+ } else {
+ downSampleInput(samples);
+ period = findPitchPeriodInRange(sonicStream.downSampleBuffer, minPeriod / SONIC_SKIP,
+ maxPeriod / SONIC_SKIP, &minDiff, &maxDiff);
+ period *= SONIC_SKIP;
+ minPeriod = period - (SONIC_SKIP << 2);
+ maxPeriod = period + (SONIC_SKIP << 2);
+ if (minPeriod < SONIC_MIN_PERIOD) {
+ minPeriod = SONIC_MIN_PERIOD;
+ }
+ if (maxPeriod > SONIC_MAX_PERIOD) {
+ maxPeriod = SONIC_MAX_PERIOD;
+ }
+ period = findPitchPeriodInRange(samples, minPeriod, maxPeriod, &minDiff, &maxDiff);
+ }
+ if (prevPeriodBetter(minDiff, maxDiff, preferNewPeriod)) {
+ retPeriod = sonicStream.prevPeriod;
+ } else {
+ retPeriod = period;
+ }
+ sonicStream.prevMinDiff = minDiff;
+ sonicStream.prevPeriod = period;
+ return retPeriod;
+}
+
+/* Overlap two sound segments, ramp the volume of one down, while ramping the
+ other one from zero up, and add them, storing the result at the output. */
+static void overlapAdd(int numSamples, short *out, short *rampDown, short *rampUp) {
+ short *o;
+ short *u;
+ short *d;
+ int t;
+
+ o = out;
+ u = rampUp;
+ d = rampDown;
+ for (t = 0; t < numSamples; t++) {
+ *o = (*d * (numSamples - t) + *u * t) / numSamples;
+ o++;
+ d++;
+ u++;
+ }
+}
+
+/* Skip over a pitch period, and copy period/speed samples to the output */
+static int skipPitchPeriod(short *samples, float speed, int period) {
+ long newSamples;
+
+ if (speed >= 2.0f) {
+ newSamples = period / (speed - 1.0f);
+ } else {
+ newSamples = period;
+ sonicStream.remainingInputToCopy = period * (2.0f - speed) / (speed - 1.0f);
+ }
+ overlapAdd(newSamples, sonicStream.outputBuffer + sonicStream.numOutputSamples,
+ samples, samples + period);
+ sonicStream.numOutputSamples += newSamples;
+ return newSamples;
+}
+
+/* Resample as many pitch periods as we have buffered on the input. */
+static void changeSpeed(float speed) {
+ short *samples;
+ int numSamples = sonicStream.numInputSamples;
+ int position = 0, period, newSamples;
+ int maxRequired = 2 * SONIC_MAX_PERIOD;
+
+ /* printf("Changing speed to %f\n", speed); */
+ if (sonicStream.numInputSamples < maxRequired) {
+ return;
+ }
+ do {
+ if (sonicStream.remainingInputToCopy > 0) {
+ newSamples = copyInputToOutput(position);
+ position += newSamples;
+ } else {
+ samples = sonicStream.inputBuffer + position;
+ period = findPitchPeriod(samples, 1);
+ newSamples = skipPitchPeriod(samples, speed, period);
+ position += period + newSamples;
+ }
+ } while (position + maxRequired <= numSamples);
+ removeInputSamples(position);
+}
+
+/* Resample as many pitch periods as we have buffered on the input. Also scale
+ the output by the volume. */
+static void processStreamInput(void) {
+ int originalNumOutputSamples = sonicStream.numOutputSamples;
+ float speed = sonicStream.speed;
+
+ if (speed > 1.00001) {
+ changeSpeed(speed);
+ } else {
+ copyToOutput(sonicStream.inputBuffer, sonicStream.numInputSamples);
+ sonicStream.numInputSamples = 0;
+ }
+ if (sonicStream.volume != 1.0f) {
+ /* Adjust output volume. */
+ scaleSamples( sonicStream.outputBuffer + originalNumOutputSamples,
+ (sonicStream.numOutputSamples - originalNumOutputSamples), sonicStream.volume);
+ }
+}
+
+/* Simple wrapper around sonicWriteFloatToStream that does the short to float
+ conversion for you. */
+void sonicWriteShortToStream(short *samples, int numSamples) {
+ addShortSamplesToInputBuffer(samples, numSamples);
+ processStreamInput();
+}
diff --git a/sonic_lite.h b/sonic_lite.h
new file mode 100644
index 0000000..09319c4
--- /dev/null
+++ b/sonic_lite.h
@@ -0,0 +1,52 @@
+/* Sonic library
+ Copyright 2010
+ Bill Cox
+ This file is part of the Sonic Library.
+
+ This file is licensed under the Apache 2.0 license.
+*/
+
+/*
+ This is a stripped down version of sonic, to help it fit in micro-controllers.
+ Only mono speedup remains. All buffers are allocated statically.
+*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Use a minimum pitch of 80 to reduce buffer sizes. Set it back to 65 if you
+ have the room in memory and find it sounds better. */
+#define SONIC_MIN_PITCH 65
+#define SONIC_MAX_PITCH 400
+
+/* These are used to down-sample some inputs to improve speed */
+#define SONIC_AMDF_FREQ 4000
+
+/* This is the sample frequency. You must hard-code it rather than passing it in. */
+#define SONIC_SAMPLE_RATE 8000
+
+/* This is the number of samples in the buffer size passed to Sonic. */
+#define SONIC_INPUT_SAMPLES 80
+
+/* Initialize Sonic. */
+void sonicInit(void);
+/* Write input samples to the stream. numSamples must be <= SONIC_INPUT_SAMPLES */
+void sonicWriteShortToStream(short *samples, int numSamples);
+/* Use this to read 16-bit data out of the stream. Sometimes no data will
+ be available, and zero is returned, which is not an error condition. */
+int sonicReadShortFromStream(short *samples, int maxSamples);
+/* Force the sonic stream to generate output using whatever data it currently
+ has. No extra delay will be added to the output, but flushing in the middle
+ of words could introduce distortion. */
+void sonicFlushStream(void);
+/* Return the number of samples in the output buffer */
+int sonicSamplesAvailable(void);
+/* Set the speed of the stream. */
+void sonicSetSpeed(float speed);
+/* Set the scaling factor of the stream. */
+void sonicSetVolume(float volume);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/spectrogram.c b/spectrogram.c
new file mode 100644
index 0000000..8eef8f4
--- /dev/null
+++ b/spectrogram.c
@@ -0,0 +1,377 @@
+/* Sonic library
+ Copyright 2016
+ Bill Cox
+ This file is part of the Sonic Library.
+
+ This file is licensed under the Apache 2.0 license.
+*/
+
+#ifdef KISS_FFT
+#include <stddef.h> /* kiss_fft.h fails to load this */
+#include <kiss_fft.h>
+#else
+#include <fftw3.h>
+#endif
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "sonic.h"
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+#ifndef M_E
+#define M_E 2.7182818284590452354
+#endif
+
+struct sonicSpectrumStruct;
+typedef struct sonicSpectrumStruct* sonicSpectrum;
+
+struct sonicSpectrogramStruct {
+ sonicSpectrum* spectrums;
+ double minPower, maxPower;
+ int numSpectrums;
+ int allocatedSpectrums;
+ int sampleRate;
+ int totalSamples;
+};
+
+struct sonicSpectrumStruct {
+ sonicSpectrogram spectrogram;
+ double* power;
+ int numFreqs; /* Number of frequencies */
+ int numSamples;
+ int startingSample;
+};
+
+/* Print out spectrum data for debugging. */
+static void dumpSpectrum(sonicSpectrum spectrum) {
+ printf("spectrum numFreqs:%d numSamples:%d startingSample:%d\n",
+ spectrum->numFreqs, spectrum->numSamples, spectrum->startingSample);
+ printf(" ");
+ int i;
+ for (i = 0; i < spectrum->numFreqs; i++) {
+ printf(" %.1f", spectrum->power[i]);
+ }
+ printf("\n");
+}
+
+/* Print out spectrogram data for debugging. */
+void dumpSpectrogram(sonicSpectrogram spectrogram) {
+ printf(
+ "spectrogram minPower:%f maxPower:%f numSpectrums:%d totalSamples:%d\n",
+ spectrogram->minPower, spectrogram->maxPower, spectrogram->numSpectrums,
+ spectrogram->totalSamples);
+ int i;
+ for (i = 0; i < spectrogram->numSpectrums; i++) {
+ dumpSpectrum(spectrogram->spectrums[i]);
+ }
+}
+
+/* Create an new spectrum. */
+static sonicSpectrum sonicCreateSpectrum(sonicSpectrogram spectrogram) {
+ sonicSpectrum spectrum =
+ (sonicSpectrum)calloc(1, sizeof(struct sonicSpectrumStruct));
+ if (spectrum == NULL) {
+ return NULL;
+ }
+ if (spectrogram->numSpectrums == spectrogram->allocatedSpectrums) {
+ spectrogram->allocatedSpectrums <<= 1;
+ spectrogram->spectrums = (sonicSpectrum*)realloc(
+ spectrogram->spectrums,
+ spectrogram->allocatedSpectrums * sizeof(sonicSpectrum));
+ if (spectrogram->spectrums == NULL) {
+ return NULL;
+ }
+ }
+ spectrogram->spectrums[spectrogram->numSpectrums++] = spectrum;
+ spectrum->spectrogram = spectrogram;
+ return spectrum;
+}
+
+/* Destroy the spectrum. */
+static void sonicDestroySpectrum(sonicSpectrum spectrum) {
+ if (spectrum == NULL) {
+ return;
+ }
+ if (spectrum->power != NULL) {
+ free(spectrum->power);
+ }
+ free(spectrum);
+}
+
+/* Create an empty spectrogram. */
+sonicSpectrogram sonicCreateSpectrogram(int sampleRate) {
+ sonicSpectrogram spectrogram =
+ (sonicSpectrogram)calloc(1, sizeof(struct sonicSpectrogramStruct));
+ if (spectrogram == NULL) {
+ return NULL;
+ }
+ spectrogram->allocatedSpectrums = 32;
+ spectrogram->spectrums = (sonicSpectrum*)calloc(
+ spectrogram->allocatedSpectrums, sizeof(sonicSpectrum));
+ if (spectrogram->spectrums == NULL) {
+ sonicDestroySpectrogram(spectrogram);
+ return NULL;
+ }
+ spectrogram->sampleRate = sampleRate;
+ spectrogram->minPower = DBL_MAX;
+ spectrogram->maxPower = DBL_MIN;
+ return spectrogram;
+}
+
+/* Destroy the spectrotram. */
+void sonicDestroySpectrogram(sonicSpectrogram spectrogram) {
+ if (spectrogram != NULL) {
+ if (spectrogram->spectrums != NULL) {
+ int i;
+ for (i = 0; i < spectrogram->numSpectrums; i++) {
+ sonicSpectrum spectrum = spectrogram->spectrums[i];
+ sonicDestroySpectrum(spectrum);
+ }
+ free(spectrogram->spectrums);
+ }
+ free(spectrogram);
+ }
+}
+
+/* Create a new bitmap. This takes ownership of data. */
+sonicBitmap sonicCreateBitmap(unsigned char* data, int numRows, int numCols) {
+ sonicBitmap bitmap = (sonicBitmap)calloc(1, sizeof(struct sonicBitmapStruct));
+ if (bitmap == NULL) {
+ return NULL;
+ }
+ bitmap->data = data;
+ bitmap->numRows = numRows;
+ bitmap->numCols = numCols;
+ return bitmap;
+}
+
+/* Destroy the bitmap. */
+void sonicDestroyBitmap(sonicBitmap bitmap) {
+ if (bitmap == NULL) {
+ return;
+ }
+ if (bitmap->data != NULL) {
+ free(bitmap->data);
+ }
+ free(bitmap);
+}
+
+/* Overlap-add the two pitch periods using a Hann window. Caller must free the
+ * result. */
+static void computeOverlapAdd(short* samples, int period, int numChannels,
+ double* ola_samples) {
+ int i;
+ for (i = 0; i < period; i++) {
+ double weight = (1.0 - cos(M_PI * i / period)) / 2.0;
+ short sample1, sample2;
+ if (numChannels == 1) {
+ sample1 = samples[i];
+ sample2 = samples[i + period];
+ } else {
+ /* Average the samples */
+ int total1 = 0;
+ int total2 = 0;
+ int j;
+ for (j = 0; j < numChannels; j++) {
+ total1 += samples[i * numChannels + j];
+ total2 += samples[(i + period) * numChannels + j];
+ }
+ sample1 = (total1 + (numChannels >> 1)) / numChannels;
+ sample2 = (total2 + (numChannels >> 1)) / numChannels;
+ }
+ ola_samples[i] = weight * sample1 + (1.0 - weight) * sample2;
+ }
+}
+
+#ifdef KISS_FFT
+/* Compute the amplitude of the kiss_complex number. */
+static double magnitude(kiss_fft_cpx c) {
+ return sqrt(c.r * c.r + c.i * c.i);
+}
+#else
+/* Compute the amplitude of the fftw_complex number. */
+static double magnitude(fftw_complex c) {
+ return sqrt(c[0] * c[0] + c[1] * c[1]);
+}
+#endif
+
+/* Add two pitch periods worth of samples to the spectrogram. There must be
+ 2*period samples. Time should advance one pitch period for each call to
+ this function. */
+void sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram,
+ short* samples, int numSamples,
+ int numChannels) {
+ int i;
+ sonicSpectrum spectrum = sonicCreateSpectrum(spectrogram);
+ spectrum->startingSample = spectrogram->totalSamples;
+ spectrogram->totalSamples += numSamples;
+ /* TODO: convert to fixed-point */
+ double* in = calloc(numSamples, sizeof(double));
+ int numFreqs = numSamples / 2 + 1;
+ spectrum->numFreqs = numFreqs;
+ spectrum->numSamples = numSamples;
+ spectrum->power = (double*)calloc(spectrum->numFreqs, sizeof(double));
+ computeOverlapAdd(samples, numSamples, numChannels, in);
+#ifdef KISS_FFT
+ kiss_fft_cpx* cin = calloc(numFreqs, sizeof(kiss_fft_cpx));
+ for (i=0; i<numFreqs; i++) {
+ cin[i].r = in[i];
+ }
+ kiss_fft_cpx* out = calloc(numFreqs, sizeof(kiss_fft_cpx));
+ kiss_fft_cfg kiss_plan = kiss_fft_alloc(numFreqs, 0, NULL, NULL);
+ kiss_fft(kiss_plan, cin, out);
+ free(cin);
+#else
+ fftw_complex* out = calloc(numFreqs, sizeof(fftw_complex));
+ fftw_plan p = fftw_plan_dft_r2c_1d(numSamples, in, out, FFTW_ESTIMATE);
+ fftw_execute(p);
+ fftw_destroy_plan(p);
+#endif /* FFTW */
+ /* Set the DC power to 0. */
+ spectrum->power[0] = 0.0;
+ for (i = 1; i < numFreqs; ++i) {
+ double power = magnitude(out[i]) / numSamples;
+ spectrum->power[i] = power;
+ if (power > spectrogram->maxPower) {
+ spectrogram->maxPower = power;
+ }
+ if (power < spectrogram->minPower) {
+ spectrogram->minPower = power;
+ }
+ }
+ free(in);
+ free(out);
+}
+
+/* Linearly interpolate the power at a given position in the spectrogram. */
+static double interpolateSpectrum(sonicSpectrum spectrum, int row,
+ int numRows) {
+ /* Flip the row so that we show lowest frequency on the bottom. */
+ row = numRows - row - 1;
+ /* We want the max row to be 1/2 the Niquist frequency, or 4 samples worth. */
+ double spectrumFreqSpacing =
+ (double)spectrum->spectrogram->sampleRate / spectrum->numSamples;
+ double rowFreqSpacing = SONIC_MAX_SPECTRUM_FREQ / (numRows - 1);
+ double targetFreq = row * rowFreqSpacing;
+ int bottomIndex = targetFreq / spectrumFreqSpacing;
+ double bottomPower = spectrum->power[bottomIndex];
+ double topPower = spectrum->power[bottomIndex + 1];
+ double position =
+ (targetFreq - bottomIndex * spectrumFreqSpacing) / spectrumFreqSpacing;
+ return (1.0 - position) * bottomPower + position * topPower;
+}
+
+/* Linearly interpolate the power at a given position in the spectrogram. */
+static double interpolateSpectrogram(sonicSpectrum leftSpectrum,
+ sonicSpectrum rightSpectrum, int row,
+ int numRows, int colTime) {
+ double leftPower = interpolateSpectrum(leftSpectrum, row, numRows);
+ double rightPower = interpolateSpectrum(rightSpectrum, row, numRows);
+ if (rightSpectrum->startingSample !=
+ leftSpectrum->startingSample + leftSpectrum->numSamples) {
+ fprintf(stderr, "Invalid sample spacing\n");
+ exit(1);
+ }
+ int remainder = colTime - leftSpectrum->startingSample;
+ double position = (double)remainder / leftSpectrum->numSamples;
+ return (1.0 - position) * leftPower + position * rightPower;
+}
+
+/* Add one column of data to the output bitmap data. */
+static void addBitmapCol(unsigned char* data, int col, int numCols, int numRows,
+ sonicSpectrogram spectrogram, sonicSpectrum spectrum,
+ sonicSpectrum nextSpectrum, int colTime) {
+ double minPower = spectrogram->minPower;
+ double maxPower = spectrogram->maxPower;
+ int row;
+ for (row = 0; row < numRows; row++) {
+ double power =
+ interpolateSpectrogram(spectrum, nextSpectrum, row, numRows, colTime);
+ if (power < minPower && power > maxPower) {
+ fprintf(stderr, "Power outside min/max range\n");
+ exit(1);
+ }
+ double range = maxPower - minPower;
+ /* Use log scale such that log(min) = 0, and log(max) = 255. */
+ int value =
+ 256.0 * sqrt(sqrt(log((M_E - 1.0) * (power - minPower) / range + 1.0)));
+ /* int value = (unsigned char)(((power - minPower)/range)*256); */
+ if (value >= 256) {
+ value = 255;
+ }
+ data[row * numCols + col] = 255 - value;
+ }
+}
+
+/* Convert the spectrogram to a bitmap. The returned array must be freed by
+ the caller. It will be rows*cols in size. The pixels are written top row
+ to bottom, and each row is left to right. So, the pixel in the 5th row from
+ the top, in the 18th column from the left in a 32x128 array would be in
+ position 128*4 + 18. NULL is returned if calloc fails to allocate the
+ memory. */
+sonicBitmap sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram,
+ int numRows, int numCols) {
+ /* dumpSpectrogram(spectrogram); */
+ unsigned char* data =
+ (unsigned char*)calloc(numRows * numCols, sizeof(unsigned char));
+ if (data == NULL) {
+ return NULL;
+ }
+ int xSpectrum = 0; /* xSpectrum is index of nextSpectrum */
+ sonicSpectrum spectrum = spectrogram->spectrums[xSpectrum++];
+ sonicSpectrum nextSpectrum = spectrogram->spectrums[xSpectrum];
+ int totalTime =
+ spectrogram->spectrums[spectrogram->numSpectrums - 1]->startingSample;
+ int col;
+ for (col = 0; col < numCols; col++) {
+ /* There must be at least two spectrums for this to work right. */
+ double colTime = (double)totalTime * col / (numCols - 1);
+ while (xSpectrum + 1 < spectrogram->numSpectrums &&
+ colTime >= nextSpectrum->startingSample) {
+ spectrum = nextSpectrum;
+ nextSpectrum = spectrogram->spectrums[++xSpectrum];
+ }
+ addBitmapCol(data, col, numCols, numRows, spectrogram, spectrum,
+ nextSpectrum, colTime);
+ }
+ return sonicCreateBitmap(data, numRows, numCols);
+}
+
+/* Write a PGM image file, which is 8-bit grayscale and looks like:
+ P2
+ # CREATOR: libsonic
+ 640 400
+ 255
+ ...
+*/
+int sonicWritePGM(sonicBitmap bitmap, char* fileName) {
+ printf("Writing PGM to %s\n", fileName);
+ FILE* file = fopen(fileName, "w");
+ if (file == NULL) {
+ return 0;
+ }
+ if (fprintf(file, "P2\n# CREATOR: libsonic\n%d %d\n255\n", bitmap->numCols,
+ bitmap->numRows) < 0) {
+ fclose(file);
+ return 0;
+ }
+ int i;
+ int numPixels = bitmap->numRows * bitmap->numCols;
+ unsigned char* p = bitmap->data;
+ for (i = 0; i < numPixels; i++) {
+ if (fprintf(file, "%d\n", 255 - *p++) < 0) {
+ fclose(file);
+ return 0;
+ }
+ }
+ fclose(file);
+ return 1;
+}
+
+#ifdef MAIN
+main(){
+
+}
+#endif
diff --git a/wave.c b/wave.c
index 05bbf1f..ace469a 100644
--- a/wave.c
+++ b/wave.c
@@ -9,365 +9,338 @@
/*
This file supports read/write wave files.
*/
+#include "wave.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include "wave.h"
#define WAVE_BUF_LEN 4096
struct waveFileStruct {
- int numChannels;
- int sampleRate;
- FILE *soundFile;
- int bytesWritten; /* The number of bytes written so far, including header */
- int failed;
- int isInput;
+ int numChannels;
+ int sampleRate;
+ FILE* soundFile;
+ int bytesWritten; /* The number of bytes written so far, including header */
+ int failed;
+ int isInput;
};
/* Write a string to a file. */
-static void writeBytes(
- waveFile file,
- void *bytes,
- int length)
-{
- size_t bytesWritten;
-
- if(file->failed) {
- return;
- }
- bytesWritten = fwrite(bytes, sizeof(char), length, file->soundFile);
- if(bytesWritten != length) {
- fprintf(stderr, "Unable to write to output file");
- file->failed = 1;
- }
- file->bytesWritten += bytesWritten;
+static void writeBytes(waveFile file, void* bytes, int length) {
+ size_t bytesWritten;
+
+ if (file->failed) {
+ return;
+ }
+ bytesWritten = fwrite(bytes, sizeof(char), length, file->soundFile);
+ if (bytesWritten != length) {
+ fprintf(stderr, "Unable to write to output file");
+ file->failed = 1;
+ }
+ file->bytesWritten += bytesWritten;
}
/* Write a string to a file. */
-static void writeString(
- waveFile file,
- char *string)
-{
- writeBytes(file, string, strlen(string));
+static void writeString(waveFile file, char* string) {
+ writeBytes(file, string, strlen(string));
}
/* Write an integer to a file in little endian order. */
-static void writeInt(
- waveFile file,
- int value)
-{
- char bytes[4];
- int i;
-
- for(i = 0; i < 4; i++) {
- bytes[i] = value;
- value >>= 8;
- }
- writeBytes(file, bytes, 4);
+static void writeInt(waveFile file, int value) {
+ char bytes[4];
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ bytes[i] = value;
+ value >>= 8;
+ }
+ writeBytes(file, bytes, 4);
}
/* Write a short integer to a file in little endian order. */
-static void writeShort(
- waveFile file,
- short value)
-{
- char bytes[2];
- int i;
-
- for(i = 0; i < 2; i++) {
- bytes[i] = value;
- value >>= 8;
- }
- writeBytes(file, bytes, 2);
+static void writeShort(waveFile file, short value) {
+ char bytes[2];
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ bytes[i] = value;
+ value >>= 8;
+ }
+ writeBytes(file, bytes, 2);
}
/* Read bytes from the input file. Return the number of bytes actually read. */
-static int readBytes(
- waveFile file,
- void *bytes,
- int length)
-{
- if(file->failed) {
- return 0;
- }
- return fread(bytes, sizeof(char), length, file->soundFile);
+static int readBytes(waveFile file, void* bytes, int length) {
+ if (file->failed) {
+ return 0;
+ }
+ return fread(bytes, sizeof(char), length, file->soundFile);
}
/* Read an exact number of bytes from the input file. */
-static void readExactBytes(
- waveFile file,
- void *bytes,
- int length)
-{
- int numRead;
-
- if(file->failed) {
- return;
- }
- numRead = fread(bytes, sizeof(char), length, file->soundFile);
- if(numRead != length) {
- fprintf(stderr, "Failed to read requested bytes from input file\n");
- file->failed = 1;
- }
+static void readExactBytes(waveFile file, void* bytes, int length) {
+ int numRead;
+
+ if (file->failed) {
+ return;
+ }
+ numRead = fread(bytes, sizeof(char), length, file->soundFile);
+ if (numRead != length) {
+ fprintf(stderr, "Failed to read requested bytes from input file\n");
+ file->failed = 1;
+ }
}
/* Read an integer from the input file */
-static int readInt(
- waveFile file)
-{
- unsigned char bytes[4];
- int value = 0, i;
-
- readExactBytes(file, bytes, 4);
- for(i = 3; i >= 0; i--) {
- value <<= 8;
- value |= bytes[i];
- }
- return value;
+static int readInt(waveFile file) {
+ unsigned char bytes[4];
+ int value = 0, i;
+
+ readExactBytes(file, bytes, 4);
+ for (i = 3; i >= 0; i--) {
+ value <<= 8;
+ value |= bytes[i];
+ }
+ return value;
}
/* Read a short from the input file */
-static int readShort(
- waveFile file)
-{
- unsigned char bytes[2];
- int value = 0, i;
-
- readExactBytes(file, bytes, 2);
- for(i = 1; i >= 0; i--) {
- value <<= 8;
- value |= bytes[i];
- }
- return value;
+static int readShort(waveFile file) {
+ unsigned char bytes[2];
+ int value = 0, i;
+
+ readExactBytes(file, bytes, 2);
+ for (i = 1; i >= 0; i--) {
+ value <<= 8;
+ value |= bytes[i];
+ }
+ return value;
}
/* Read a string from the input and compare it to an expected string. */
-static void expectString(
- waveFile file,
- char *expectedString)
-{
- char buf[11]; /* Be sure that we never call with a longer string */
- int length = strlen(expectedString);
-
- if(length > 10) {
- fprintf(stderr, "Internal error: expected string too long\n");
- file->failed = 1;
- } else {
- readExactBytes(file, buf, length);
- buf[length] = '\0';
- if(strcmp(expectedString, buf)) {
- fprintf(stderr, "Unsupported wave file format\n");
- file->failed = 1;
- }
+static void expectString(waveFile file, char* expectedString) {
+ char buf[11]; /* Be sure that we never call with a longer string */
+ int length = strlen(expectedString);
+
+ if (length > 10) {
+ fprintf(stderr, "Internal error: expected string too long\n");
+ file->failed = 1;
+ } else {
+ readExactBytes(file, buf, length);
+ buf[length] = '\0';
+ if (strcmp(expectedString, buf)) {
+ fprintf(stderr, "Unsupported wave file format: Expected '%s', got '%s;\n",
+ expectedString, buf);
+ file->failed = 1;
}
+ }
}
/* Write the header of the wave file. */
-static void writeHeader(
- waveFile file,
- int sampleRate)
-{
- /* write the wav file per the wav file format */
- writeString(file, "RIFF"); /* 00 - RIFF */
- /* We have to fseek and overwrite this later when we close the file because */
- /* we don't know how big it is until then. */
- writeInt(file, 36 /* + dataLength */); /* 04 - how big is the rest of this file? */
- writeString(file, "WAVE"); /* 08 - WAVE */
- writeString(file, "fmt "); /* 12 - fmt */
- writeInt(file, 16); /* 16 - size of this chunk */
- writeShort(file, 1); /* 20 - what is the audio format? 1 for PCM = Pulse Code Modulation */
- writeShort(file, 1); /* 22 - mono or stereo? 1 or 2? (or 5 or ???) */
- writeInt(file, sampleRate); /* 24 - samples per second (numbers per second) */
- writeInt(file, sampleRate * 2); /* 28 - bytes per second */
- writeShort(file, 2); /* 32 - # of bytes in one sample, for all channels */
- writeShort(file, 16); /* 34 - how many bits in a sample(number)? usually 16 or 24 */
- writeString(file, "data"); /* 36 - data */
- writeInt(file, 0); /* 40 - how big is this data chunk */
+static void writeHeader(waveFile file, int sampleRate, int numChannels) {
+ /* write the wav file per the wav file format */
+ writeString(file, "RIFF"); /* 00 - RIFF */
+ /* We have to fseek and overwrite this later when we close the file because */
+ /* we don't know how big it is until then. */
+ writeInt(file,
+ 36 /* + dataLength */); /* 04 - how big is the rest of this file? */
+ writeString(file, "WAVE"); /* 08 - WAVE */
+ writeString(file, "fmt "); /* 12 - fmt */
+ writeInt(file, 16); /* 16 - size of this chunk */
+ writeShort(
+ file,
+ 1); /* 20 - what is the audio format? 1 for PCM = Pulse Code Modulation */
+ writeShort(file,
+ numChannels); /* 22 - mono or stereo? 1 or 2? (or 5 or ???) */
+ writeInt(file, sampleRate); /* 24 - samples per second (numbers per second) */
+ writeInt(file, sampleRate * 2); /* 28 - bytes per second */
+ writeShort(file, 2); /* 32 - # of bytes in one sample, for all channels */
+ writeShort(
+ file, 16); /* 34 - how many bits in a sample(number)? usually 16 or 24 */
+ writeString(file, "data"); /* 36 - data */
+ writeInt(file, 0); /* 40 - how big is this data chunk */
}
/* Read the header of the wave file. */
-static int readHeader(
- waveFile file)
-{
- int data;
-
- expectString(file, "RIFF");
- data = readInt(file); /* 04 - how big is the rest of this file? */
- expectString(file, "WAVE"); /* 08 - WAVE */
- expectString(file, "fmt "); /* 12 - fmt */
- int chunkSize = readInt(file); /* 16 or 18 - size of this chunk */
- if(chunkSize != 16 && chunkSize != 18) {
- fprintf(stderr, "Only basic wave files are supported\n");
- return 0;
- }
- data = readShort(file); /* 20 - what is the audio format? 1 for PCM = Pulse Code Modulation */
- if(data != 1) {
- fprintf(stderr, "Only PCM wave files are supported\n");
- return 0;
+static int readHeader(waveFile file) {
+ int data;
+
+ expectString(file, "RIFF");
+ data = readInt(file); /* 04 - how big is the rest of this file? */
+ expectString(file, "WAVE"); /* 08 - WAVE */
+ expectString(file, "fmt "); /* 12 - fmt */
+ int chunkSize = readInt(file); /* 16 or 18 - size of this chunk */
+ if (chunkSize != 16 && chunkSize != 18) {
+ fprintf(stderr, "Only basic wave files are supported\n");
+ return 0;
+ }
+ data = readShort(file); /* 20 - what is the audio format? 1 for PCM = Pulse
+ Code Modulation */
+ if (data != 1) {
+ fprintf(stderr, "Only PCM wave files are supported\n");
+ return 0;
+ }
+ file->numChannels =
+ readShort(file); /* 22 - mono or stereo? 1 or 2? (or 5 or ???) */
+ file->sampleRate =
+ readInt(file); /* 24 - samples per second (numbers per second) */
+ readInt(file); /* 28 - bytes per second */
+ readShort(file); /* 32 - # of bytes in one sample, for all channels */
+ data = readShort(
+ file); /* 34 - how many bits in a sample(number)? usually 16 or 24 */
+ if (data != 16) {
+ fprintf(stderr, "Only 16 bit PCM wave files are supported\n");
+ return 0;
+ }
+ if (chunkSize == 18) { /* ffmpeg writes 18, and so has 2 extra bytes here */
+ data = readShort(file);
+ }
+
+ /* Read and discard chunks until we find the "data" chunk or fail */
+ char chunk[5];
+ chunk[4] = 0;
+
+ while (1) {
+ readExactBytes(file, chunk, 4); /* chunk id */
+ int size = readInt(file); /* how big is this data chunk */
+ if (strcmp(chunk, "data") == 0) {
+ return 1;
}
- file->numChannels = readShort(file); /* 22 - mono or stereo? 1 or 2? (or 5 or ???) */
- file->sampleRate = readInt(file); /* 24 - samples per second (numbers per second) */
- readInt(file); /* 28 - bytes per second */
- readShort(file); /* 32 - # of bytes in one sample, for all channels */
- data = readShort(file); /* 34 - how many bits in a sample(number)? usually 16 or 24 */
- if(data != 16) {
- fprintf(stderr, "Only 16 bit PCM wave files are supported\n");
- return 0;
+ if (fseek(file->soundFile, size, SEEK_CUR) != 0) {
+ fprintf(stderr, "Failed to seek on input file.\n");
+ return 0;
}
- if (chunkSize == 18) { /* ffmpeg writes 18, and so has 2 extra bytes here */
- data = readShort(file);
- }
- expectString(file, "data"); /* 36 - data */
- readInt(file); /* 40 - how big is this data chunk */
- return 1;
+ }
}
/* Close the input or output file and free the waveFile. */
-static void closeFile(
- waveFile file)
-{
- FILE *soundFile = file->soundFile;
-
- if(soundFile != NULL) {
- fclose(soundFile);
- file->soundFile = NULL;
- }
- free(file);
+static void closeFile(waveFile file) {
+ FILE* soundFile = file->soundFile;
+
+ if (soundFile != NULL) {
+ fclose(soundFile);
+ file->soundFile = NULL;
+ }
+ free(file);
}
-/* Open a 16-bit little-endian wav file for reading. It may be mono or stereo. */
-waveFile openInputWaveFile(
- char *fileName,
- int *sampleRate,
- int *numChannels)
-{
- waveFile file;
- FILE *soundFile = fopen(fileName, "rb");
-
- if(soundFile == NULL) {
- fprintf(stderr, "Unable to open wave file %s for reading\n", fileName);
- return NULL;
- }
- file = (waveFile)calloc(1, sizeof(struct waveFileStruct));
- file->soundFile = soundFile;
- file->isInput = 1;
- if(!readHeader(file)) {
- closeFile(file);
- return NULL;
- }
- *sampleRate = file->sampleRate;
- *numChannels = file->numChannels;
- return file;
+/* Open a 16-bit little-endian wav file for reading. It may be mono or stereo.
+ */
+waveFile openInputWaveFile(const char* fileName, int* sampleRate, int* numChannels) {
+ waveFile file;
+ FILE* soundFile = fopen(fileName, "rb");
+
+ if (soundFile == NULL) {
+ fprintf(stderr, "Unable to open wave file %s for reading\n", fileName);
+ return NULL;
+ }
+ file = (waveFile)calloc(1, sizeof(struct waveFileStruct));
+ file->soundFile = soundFile;
+ file->isInput = 1;
+ if (!readHeader(file)) {
+ closeFile(file);
+ return NULL;
+ }
+ *sampleRate = file->sampleRate;
+ *numChannels = file->numChannels;
+ return file;
}
-/* Open a 16-bit little-endian wav file for writing. It may be mono or stereo. */
-waveFile openOutputWaveFile(
- char *fileName,
- int sampleRate,
- int numChannels)
-{
- waveFile file;
- FILE *soundFile = fopen(fileName, "wb");
-
- if(soundFile == NULL) {
- fprintf(stderr, "Unable to open wave file %s for writing\n", fileName);
- return NULL;
- }
- file = (waveFile)calloc(1, sizeof(struct waveFileStruct));
- file->soundFile = soundFile;
- file->sampleRate = sampleRate;
- file->numChannels = numChannels;
- writeHeader(file, sampleRate);
- if(file->failed) {
- closeFile(file);
- return NULL;
- }
- return file;
+/* Open a 16-bit little-endian wav file for writing. It may be mono or stereo.
+ */
+waveFile openOutputWaveFile(const char* fileName, int sampleRate, int numChannels) {
+ waveFile file;
+ FILE* soundFile = fopen(fileName, "wb");
+
+ if (soundFile == NULL) {
+ fprintf(stderr, "Unable to open wave file %s for writing\n", fileName);
+ return NULL;
+ }
+ file = (waveFile)calloc(1, sizeof(struct waveFileStruct));
+ file->soundFile = soundFile;
+ file->sampleRate = sampleRate;
+ file->numChannels = numChannels;
+ writeHeader(file, sampleRate, numChannels);
+ if (file->failed) {
+ closeFile(file);
+ return NULL;
+ }
+ return file;
}
/* Close the sound file. */
-int closeWaveFile(
- waveFile file)
-{
- FILE *soundFile = file->soundFile;
- int passed = 1;
-
- if(!file->isInput) {
- if(fseek(soundFile, 4, SEEK_SET) != 0) {
- fprintf(stderr, "Failed to seek on input file.\n");
- passed = 0;
- } else {
- /* Now update the file to have the correct size. */
- writeInt(file, file->bytesWritten - 8);
- if(file->failed) {
- fprintf(stderr, "Failed to write wave file size.\n");
- passed = 0;
- }
- if(fseek(soundFile, 40, SEEK_SET) != 0) {
- fprintf(stderr, "Failed to seek on input file.\n");
- passed = 0;
- } else {
- /* Now update the file to have the correct size. */
- writeInt(file, file->bytesWritten - 48);
- if(file->failed) {
- fprintf(stderr, "Failed to write wave file size.\n");
- passed = 0;
- }
- }
+int closeWaveFile(waveFile file) {
+ FILE* soundFile = file->soundFile;
+ int passed = 1;
+
+ if (!file->isInput) {
+ if (fseek(soundFile, 4, SEEK_SET) != 0) {
+ fprintf(stderr, "Failed to seek on input file.\n");
+ passed = 0;
+ } else {
+ /* Now update the file to have the correct size. */
+ writeInt(file, file->bytesWritten - 8);
+ if (file->failed) {
+ fprintf(stderr, "Failed to write wave file size.\n");
+ passed = 0;
+ }
+ if (fseek(soundFile, 40, SEEK_SET) != 0) {
+ fprintf(stderr, "Failed to seek on input file.\n");
+ passed = 0;
+ } else {
+ /* Now update the file to have the correct size. */
+ writeInt(file, file->bytesWritten - 48);
+ if (file->failed) {
+ fprintf(stderr, "Failed to write wave file size.\n");
+ passed = 0;
}
+ }
}
- closeFile(file);
- return passed;
+ }
+ closeFile(file);
+ return passed;
}
-/* Read from the wave file. Return the number of samples read. */
-int readFromWaveFile(
- waveFile file,
- short *buffer,
- int maxSamples)
-{
- int i, bytesRead, samplesRead;
- int bytePos = 0;
- unsigned char bytes[WAVE_BUF_LEN];
- short sample;
-
- if(maxSamples*file->numChannels*2 > WAVE_BUF_LEN) {
- maxSamples = WAVE_BUF_LEN/(file->numChannels*2);
- }
- bytesRead = readBytes(file, bytes, maxSamples*file->numChannels*2);
- samplesRead = bytesRead/(file->numChannels*2);
- for(i = 0; i < samplesRead*file->numChannels; i++) {
- sample = bytes[bytePos++];
- sample |= (unsigned int)bytes[bytePos++] << 8;
- *buffer++ = sample;
- }
- return samplesRead;
+/* Read from the wave file. Return the number of samples read.
+ numSamples and maxSamples are the number of **multi-channel** samples */
+int readFromWaveFile(waveFile file, short* buffer, int maxSamples) {
+ int i, bytesRead, samplesRead;
+ int bytePos = 0;
+ unsigned char bytes[WAVE_BUF_LEN];
+ short sample;
+
+ if (maxSamples * file->numChannels * 2 > WAVE_BUF_LEN) {
+ maxSamples = WAVE_BUF_LEN / (file->numChannels * 2);
+ }
+ bytesRead = readBytes(file, bytes, maxSamples * file->numChannels * 2);
+ samplesRead = bytesRead / (file->numChannels * 2);
+ for (i = 0; i < samplesRead * file->numChannels; i++) {
+ sample = bytes[bytePos++];
+ sample |= (unsigned int)bytes[bytePos++] << 8;
+ *buffer++ = sample;
+ }
+ return samplesRead;
}
/* Write to the wave file. */
-int writeToWaveFile(
- waveFile file,
- short *buffer,
- int numSamples)
-{
- int i;
- int bytePos = 0;
- unsigned char bytes[WAVE_BUF_LEN];
- short sample;
- int total = numSamples*file->numChannels;
-
- for(i = 0; i < total; i++) {
- if(bytePos == WAVE_BUF_LEN) {
- writeBytes(file, bytes, bytePos);
- bytePos = 0;
- }
- sample = buffer[i];
- bytes[bytePos++] = sample;
- bytes[bytePos++] = sample >> 8;
- }
- if(bytePos != 0) {
- writeBytes(file, bytes, bytePos);
+int writeToWaveFile(waveFile file, short* buffer, int numSamples) {
+ int i;
+ int bytePos = 0;
+ unsigned char bytes[WAVE_BUF_LEN];
+ short sample;
+ int total = numSamples * file->numChannels;
+
+ for (i = 0; i < total; i++) {
+ if (bytePos == WAVE_BUF_LEN) {
+ writeBytes(file, bytes, bytePos);
+ bytePos = 0;
}
- return file->failed;
+ sample = buffer[i];
+ bytes[bytePos++] = sample;
+ bytes[bytePos++] = sample >> 8;
+ }
+ if (bytePos != 0) {
+ writeBytes(file, bytes, bytePos);
+ }
+ return file->failed;
}
diff --git a/wave.h b/wave.h
index aad45c5..d2de547 100644
--- a/wave.h
+++ b/wave.h
@@ -7,10 +7,10 @@
/* Support for reading and writing wave files. */
-typedef struct waveFileStruct *waveFile;
+typedef struct waveFileStruct* waveFile;
-waveFile openInputWaveFile(char *fileName, int *sampleRate, int *numChannels);
-waveFile openOutputWaveFile(char *fileName, int sampleRate, int numChannels);
+waveFile openInputWaveFile(const char* fileName, int* sampleRate, int* numChannels);
+waveFile openOutputWaveFile(const char* fileName, int sampleRate, int numChannels);
int closeWaveFile(waveFile file);
-int readFromWaveFile(waveFile file, short *buffer, int maxSamples);
-int writeToWaveFile(waveFile file, short *buffer, int numSamples);
+int readFromWaveFile(waveFile file, short* buffer, int maxSamples);
+int writeToWaveFile(waveFile file, short* buffer, int numSamples);