Clean-up and soft pinning

interactiveaudiolab · Dec 9, 2022 · c53e703 · c53e703
1 parent 2b3a8ca
commit c53e703
Show file tree

Hide file tree

Showing 35 changed files with 110 additions and 415 deletions.
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@
 
 </div>
 
-Training, evaluation, and inference of neural pitch and periodicity estimators in PyTorch.
+Training, evaluation, and inference of neural pitch and periodicity estimators in PyTorch. Includes the original code for the paper "Cross-domain Neural Pitch and Periodicity Estimation".
 
 
 ## Table of contents
@@ -186,13 +186,6 @@ Evaluate a model. `<checkpoint>` is the checkpoint file to evaluate and `<gpu>`
 is the GPU index.
 
 
-### Analyze
-
-`python -m penne.evaluate.analyze`
-
-Aggregate model evaluations to produce tables of results.
-
-
 ### Plot
 
 ```
@@ -247,16 +240,16 @@ function of the voiced/unvoiced threshold.
 ## Citation
 
 ### IEEE
-M. Morrison, C. Hseih, N. Pruyne, and B. Pardo, "Cross-domain Neural Pitch and Periodicity Estimation," Submitted to <conference>, <month> 2023.
+M. Morrison, C. Hsieh, N. Pruyne, and B. Pardo, "Cross-domain Neural Pitch and Periodicity Estimation," IEEE Transactions on Speech and Audio Processing, <month> 2023.
 
 
 ### BibTex
 
 ```
 @inproceedings{morrison2023cross,
     title={Cross-domain Neural Pitch and Periodicity Estimation},
-    author={Morrison, Max and Hseih, Caedon and Pruyne, Nathan and Pardo, Bryan},
-    booktitle={Submitted to TODO},
+    author={Morrison, Max and Hsieh, Caedon and Pruyne, Nathan and Pardo, Bryan},
+    booktitle={IEEE Transactions on Speech and Audio Processing},
     month={TODO},
     year={2023}
 }
diff --git a/config/dio-16k.py b/config/dio-16k.py
diff --git a/config/dio.py b/config/dio.py
@@ -1,4 +1,10 @@
 CONFIG = 'dio'
 
+# Distance between adjacent frames
+HOPSIZE = 160  # samples
+
 # The pitch estimation method to use
 METHOD = 'dio'
+
+# Audio sample rate
+SAMPLE_RATE = 16000  # hz
diff --git a/config/fcnf0++-onnx.py b/config/fcnf0++-onnx.py
diff --git a/config/fcnf0++-weighted.py b/config/fcnf0++-weighted.py
@@ -2,3 +2,6 @@
 
 # The decoder to use for postprocessing
 DECODER = 'weighted'
+
+# The size of the window used for locally normal pitch decoding
+LOCAL_PITCH_WINDOW_SIZE = 19
diff --git a/config/fcnf0++-window-11.py b/config/fcnf0++-window-11.py
diff --git a/config/fcnf0++-window-13.py b/config/fcnf0++-window-13.py
diff --git a/config/fcnf0++-window-15.py b/config/fcnf0++-window-15.py
diff --git a/config/fcnf0++-window-17.py b/config/fcnf0++-window-17.py
diff --git a/config/fcnf0++-window-19.py b/config/fcnf0++-window-19.py
diff --git a/config/fcnf0++-window-21.py b/config/fcnf0++-window-21.py
diff --git a/config/fcnf0++-window-23.py b/config/fcnf0++-window-23.py
diff --git a/config/fcnf0++-window-25.py b/config/fcnf0++-window-25.py
diff --git a/config/fcnf0++-window-27.py b/config/fcnf0++-window-27.py
diff --git a/config/fcnf0++-window-3.py b/config/fcnf0++-window-3.py
diff --git a/config/fcnf0++-window-7.py b/config/fcnf0++-window-7.py
diff --git a/config/fcnf0++-window-9.py b/config/fcnf0++-window-9.py
diff --git a/config/fcnf0++.py b/config/fcnf0++.py
@@ -1 +0,0 @@
-CONFIG = 'fcnf0++'

diff --git a/config/pyin-16k.py b/config/pyin-16k.py
diff --git a/config/pyin-viterbi-16k.py b/config/pyin-viterbi-16k.py
diff --git a/config/pyin-viterbi.py b/config/pyin-viterbi.py
@@ -3,5 +3,11 @@
 # The decoder to use for postprocessing
 DECODER = 'viterbi'
 
+# Distance between adjacent frames
+HOPSIZE = 160  # samples
+
 # The pitch estimation method to use
 METHOD = 'pyin'
+
+# Audio sample rate
+SAMPLE_RATE = 16000  # hz
diff --git a/config/pyin.py b/config/pyin.py
@@ -1,4 +1,10 @@
 CONFIG = 'pyin'
 
+# Distance between adjacent frames
+HOPSIZE = 160  # samples
+
 # The pitch estimation method to use
 METHOD = 'pyin'
+
+# Audio sample rate
+SAMPLE_RATE = 16000  # hz
diff --git a/penne/__init__.py b/penne/__init__.py
@@ -4,9 +4,6 @@
 # - Yapecs passwords
 # - Soft version pinning
 
-# Readme
-# - API
-
 # Paper
 # - TSAP template
 # - Pitch posteriorgram figure (title figure; rhapsody in blue)
@@ -49,7 +46,6 @@
 from . import dsp
 from . import evaluate
 from . import load
-from . import onnx
 from . import partition
 from . import periodicity
 from . import plot

diff --git a/penne/checkpoint.py b/penne/checkpoint.py
@@ -22,20 +22,13 @@ def latest_path(directory, regex='*.pt'):
     return files[-1]
 
 
-def load(checkpoint_path, model=None, optimizer=None):
+def load(checkpoint_path, model, optimizer=None):
     """Load model checkpoint from file"""
     # Load checkpoint
     checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
 
-    if penne.ONNX and model is None:
-
-        # Replace model with ONNX model
-        model = penne.onnx.model(checkpoint_path.with_suffix('.onnx'))
-
-    else:
-
-        # Restore model weights
-        model.load_state_dict(checkpoint_dict['model'])
+    # Restore model weights
+    model.load_state_dict(checkpoint_dict['model'])
 
     # Restore optimizer
     if optimizer is not None:

diff --git a/penne/config/defaults.py b/penne/config/defaults.py
@@ -108,9 +108,6 @@
 # Number of batches to use for validation
 LOG_STEPS = 64
 
-# Whether to use ONNX for CPU inference
-ONNX = False
-
 # Method to use for periodicity extraction
 PERIODICITY = 'max'
 
@@ -141,9 +138,6 @@
 # Batch size
 BATCH_SIZE = 128
 
-# Weight applied to positive examples in binary cross-entropy loss
-BCE_POSITIVE_WEIGHT = 1.
-
 # Whether to stop training when validation loss stops improving
 EARLY_STOPPING = False
 

diff --git a/penne/core.py b/penne/core.py
@@ -216,38 +216,24 @@ def infer(frames, checkpoint=penne.DEFAULT_CHECKPOINT):
         ):
 
             # Maybe initialize model
-            if penne.ONNX and frames.device.type == 'cpu':
-                model = None
-            else:
-                model = penne.Model()
+            model = penne.Model()
 
             # Load from disk
             infer.model, *_ = penne.checkpoint.load(checkpoint, model)
             infer.checkpoint = checkpoint
             infer.device_type = frames.device.type
 
             # Move model to correct device (no-op if devices are the same)
-            if not penne.ONNX or frames.device.type == 'cuda':
-                infer.model = infer.model.to(frames.device)
+            infer.model = infer.model.to(frames.device)
 
     # Time inference
     with penne.time.timer('infer'):
 
-        if penne.ONNX and frames.device.type == 'cpu':
+        # Prepare model for inference
+        with inference_context(infer.model):
 
             # Infer
-            logits = infer.model.run(
-                None,
-                {infer.model.get_inputs()[0].name: frames.numpy()})[0]
-            logits = torch.from_numpy(logits)
-
-        else:
-
-            # Prepare model for inference
-            with inference_context(infer.model):
-
-                # Infer
-                logits = infer.model(frames)
+            logits = infer.model(frames)
 
         # If we're benchmarking, make sure inference finishes within timer
         if penne.BENCHMARK and logits.device.type == 'cuda':

diff --git a/penne/evaluate/__init__.py b/penne/evaluate/__init__.py
@@ -1,4 +1,3 @@
 from . import metrics
-from .analyze import analyze
 from .core import *
 from .metrics import Metrics, PitchMetrics
diff --git a/penne/evaluate/analyze/__init__.py b/penne/evaluate/analyze/__init__.py
diff --git a/penne/evaluate/analyze/__main__.py b/penne/evaluate/analyze/__main__.py