Transition table clarification and fixed typos

dave-vsdevs · Nov 10, 2014 · b4d5ed9 · b4d5ed9
1 parent 66b3414
commit b4d5ed9
Show file tree

Hide file tree

Showing 3 changed files with 127 additions and 33 deletions.
diff --git a/src/SamTabs.h b/src/SamTabs.h
@@ -130,7 +130,7 @@ VOWELS
 13   |  AX     | 10100100 |
 14   |  IX     | 10100100 |
 
-DIPTHONGS
+DIPHTONGS
 48   |  EY     | 10110100 |
 49   |  AY     | 10110100 |
 50   |  OY     | 10110100 |

diff --git a/src/render.c b/src/render.c
@@ -438,40 +438,134 @@ do
 // -------------------
 //pos47694:
 
-
 // CREATE TRANSITIONS
 //
-// Linear transitions are now created to smoothly connect each
-// phoeneme. This transition is spread between the ending frames
-// of the old phoneme (outBlendLength), and the beginning frames 
-// of the new phoneme (inBlendLength).
+// Linear transitions are now created to smoothly connect the
+// end of one sustained portion of a phoneme to the following
+// phoneme. 
+//
+// To do this, three tables are used:
+//
+//  Table         Purpose
+//  =========     ==================================================
+//  blendRank     Determines which phoneme's blend values are used.
+//
+//  blendOut      The number of frames at the end of the phoneme that
+//                will be used to transition to the following phoneme.
+//
+//  blendIn       The number of frames of the following phoneme that
+//                will be used to transition into that phoneme.
+//
+// In creating a transition between two phonemes, the phoneme
+// with the HIGHEST rank is used. Phonemes are ranked on how much
+// their identity is based on their transitions. For example, 
+// vowels are and diphthongs are identified by their sustained portion, 
+// rather than the transitions, so they are given low values. In contrast,
+// stop consonants (P, B, T, K) and glides (Y, L) are almost entirely
+// defined by their transitions, and are given high rank values.
+//
+// Here are the rankings used by SAM:
+//
+//     Rank    Type                         Phonemes
+//     2       All vowels                   IY, IH, etc.
+//     5       Diphthong endings            YX, WX, ER
+//     8       Terminal liquid consonants   LX, WX, YX, N, NX
+//     9       Liquid consonants            L, RX, W
+//     10      Glide                        R, OH
+//     11      Glide                        WH
+//     18      Voiceless fricatives         S, SH, F, TH
+//     20      Voiced fricatives            Z, ZH, V, DH
+//     23      Plosives, stop consonants    P, T, K, KX, DX, CH
+//     26      Stop consonants              J, GX, B, D, G
+//     27-29   Stop consonants (internal)   **
+//     30      Unvoiced consonants          /H, /X and Q*
+//     160     Nasal                        M
 //
 // To determine how many frames to use, the two phonemes are 
 // compared using the blendRank[] table. The phoneme with the 
-// smaller score is used. In case of a tie, a blend of each is used:
+// higher rank is selected. In case of a tie, a blend of each is used:
 //
 //      if blendRank[phoneme1] ==  blendRank[phomneme2]
 //          // use lengths from each phoneme
 //          outBlendFrames = outBlend[phoneme1]
 //          inBlendFrames = outBlend[phoneme2]
-//      else if blendRank[phoneme1] < blendRank[phoneme2]
+//      else if blendRank[phoneme1] > blendRank[phoneme2]
 //          // use lengths from first phoneme
 //          outBlendFrames = outBlendLength[phoneme1]
 //          inBlendFrames = inBlendLength[phoneme1]
 //      else
 //          // use lengths from the second phoneme
-//          // note that in and out are swapped around!
+//          // note that in and out are SWAPPED!
 //          outBlendFrames = inBlendLength[phoneme2]
 //          inBlendFrames = outBlendLength[phoneme2]
 //
-//  Blend lengths can't be less than zero.
+// Blend lengths can't be less than zero.
+//
+// Transitions are assumed to be symetrical, so if the transition 
+// values for the second phoneme are used, the inBlendLength and 
+// outBlendLength values are SWAPPED.
 //
 // For most of the parameters, SAM interpolates over the range of the last
 // outBlendFrames-1 and the first inBlendFrames.
 //
 // The exception to this is the Pitch[] parameter, which is interpolates the
-// pitch from the center of the current phoneme to the center of the next
+// pitch from the CENTER of the current phoneme to the CENTER of the next
 // phoneme.
+//
+// Here are two examples. First, For example, consider the word "SUN" (S AH N)
+//
+//    Phoneme   Duration    BlendWeight    OutBlendFrames    InBlendFrames
+//    S         2           18             1                 3
+//    AH        8           2              4                 4
+//    N         7           8              1                 2
+//
+// The formant transitions for the output frames are calculated as follows:
+//
+//     flags ampl1 freq1 ampl2 freq2 ampl3 freq3 pitch
+//    ------------------------------------------------
+// S
+//    241     0     6     0    73     0    99    61   Use S (weight 18) for transition instead of AH (weight 2)
+//    241     0     6     0    73     0    99    61   <-- (OutBlendFrames-1) = (1-1) = 0 frames
+// AH
+//      0     2    10     2    66     0    96    59 * <-- InBlendFrames = 3 frames
+//      0     4    14     3    59     0    93    57 *
+//      0     8    18     5    52     0    90    55 *
+//      0    15    22     9    44     1    87    53
+//      0    15    22     9    44     1    87    53   
+//      0    15    22     9    44     1    87    53   Use N (weight 8) for transition instead of AH (weight 2).
+//      0    15    22     9    44     1    87    53   Since N is second phoneme, reverse the IN and OUT values.
+//      0    11    17     8    47     1    98    56 * <-- (InBlendFrames-1) = (2-1) = 1 frames
+// N
+//      0     8    12     6    50     1   109    58 * <-- OutBlendFrames = 1
+//      0     5     6     5    54     0   121    61
+//      0     5     6     5    54     0   121    61
+//      0     5     6     5    54     0   121    61
+//      0     5     6     5    54     0   121    61
+//      0     5     6     5    54     0   121    61
+//      0     5     6     5    54     0   121    61
+//
+// Now, consider the reverse "NUS" (N AH S):
+//
+//     flags ampl1 freq1 ampl2 freq2 ampl3 freq3 pitch
+//    ------------------------------------------------
+// N
+//     0     5     6     5    54     0   121    61
+//     0     5     6     5    54     0   121    61
+//     0     5     6     5    54     0   121    61
+//     0     5     6     5    54     0   121    61
+//     0     5     6     5    54     0   121    61   
+//     0     5     6     5    54     0   121    61   Use N (weight 8) for transition instead of AH (weight 2)
+//     0     5     6     5    54     0   121    61   <-- (OutBlendFrames-1) = (1-1) = 0 frames
+// AH
+//     0     8    11     6    51     0   110    59 * <-- InBlendFrames = 2
+//     0    11    16     8    48     0    99    56 *
+//     0    15    22     9    44     1    87    53   Use S (weight 18) for transition instead of AH (weight 2)
+//     0    15    22     9    44     1    87    53   Since S is second phoneme, reverse the IN and OUT values.
+//     0     9    18     5    51     1    90    55 * <-- (InBlendFrames-1) = (3-1) = 2
+//     0     4    14     3    58     1    93    57 *
+// S
+//   241     2    10     2    65     1    96    59 * <-- OutBlendFrames = 1
+//   241     0     6     0    73     0    99    61
 
 	A = 0;
 	mem44 = 0;

diff --git a/src/sam.c b/src/sam.c
@@ -279,7 +279,7 @@ void InsertBreath()
 //  In those cases, the stress value+1 from the following phoneme is copied.
 //
 // For example, the word LOITER is represented as LOY5TER, with as stress
-// of 5 on the dipthong OY. This routine will copy the stress value of 6 (5+1)
+// of 5 on the diphtong OY. This routine will copy the stress value of 6 (5+1)
 // to the L that precedes it.
 
 
@@ -587,8 +587,8 @@ void Code41240()
 
 // Rewrites the phonemes using the following rules:
 //
-//       <DIPTHONG ENDING WITH WX> -> <DIPTHONG ENDING WITH WX> WX
-//       <DIPTHONG NOT ENDING WITH WX> -> <DIPTHONG NOT ENDING WITH WX> YX
+//       <DIPHTONG ENDING WITH WX> -> <DIPHTONG ENDING WITH WX> WX
+//       <DIPHTONG NOT ENDING WITH WX> -> <DIPHTONG NOT ENDING WITH WX> YX
 //       UL -> AX L
 //       UM -> AX M
 //       <STRESSED VOWEL> <SILENCE> <STRESSED VOWEL> -> <STRESSED VOWEL> <SILENCE> Q <VOWEL>
@@ -597,8 +597,8 @@ void Code41240()
 //       <VOWEL> R -> <VOWEL> RX
 //       <VOWEL> L -> <VOWEL> LX
 //       G S -> G Z
-//       K <VOWEL OR DIPTHONG NOT ENDING WITH IY> -> KX <VOWEL OR DIPTHONG NOT ENDING WITH IY>
-//       G <VOWEL OR DIPTHONG NOT ENDING WITH IY> -> GX <VOWEL OR DIPTHONG NOT ENDING WITH IY>
+//       K <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> KX <VOWEL OR DIPHTONG NOT ENDING WITH IY>
+//       G <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> GX <VOWEL OR DIPHTONG NOT ENDING WITH IY>
 //       S P -> S B
 //       S T -> S D
 //       S K -> S G
@@ -644,15 +644,15 @@ void Parser2()
 		Y = A;
 
 // RULE: 
-//       <DIPTHONG ENDING WITH WX> -> <DIPTHONG ENDING WITH WX> WX
-//       <DIPTHONG NOT ENDING WITH WX> -> <DIPTHONG NOT ENDING WITH WX> YX
+//       <DIPHTONG ENDING WITH WX> -> <DIPHTONG ENDING WITH WX> WX
+//       <DIPHTONG NOT ENDING WITH WX> -> <DIPHTONG NOT ENDING WITH WX> YX
 // Example: OIL, COW
 
 
-// Check for DIPTHONG
+// Check for DIPHTONG
 		if ((flags[A] & 16) == 0) goto pos41457;
 
-// Not a dipthong. Get the stress
+// Not a diphthong. Get the stress
 		mem58 = stress[pos];
 
 // End in IY sound?
@@ -663,8 +663,8 @@ void Parser2()
 		//pos41443:
 // Insert at WX or YX following, copying the stress
 
-		if (debug) if (A==20) printf("RULE: insert WX following dipthong NOT ending in IY sound\n");
-		if (debug) if (A==21) printf("RULE: insert YX following dipthong ending in IY sound\n");
+		if (debug) if (A==20) printf("RULE: insert WX following diphtong NOT ending in IY sound\n");
+		if (debug) if (A==21) printf("RULE: insert YX following diphtong ending in IY sound\n");
 		Insert(pos+1, A, mem59, mem58);
 		X = pos;
 // Jump to ???
@@ -870,7 +870,7 @@ void Parser2()
 		}
 
 // RULE:
-//             K <VOWEL OR DIPTHONG NOT ENDING WITH IY> -> KX <VOWEL OR DIPTHONG NOT ENDING WITH IY>
+//             K <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> KX <VOWEL OR DIPHTONG NOT ENDING WITH IY>
 // Example: COW
 
 // Is current phoneme K?
@@ -882,17 +882,17 @@ void Parser2()
 			if (Y == 255) phonemeindex[pos] = 75; // ML : prevents an index out of bounds problem		
 			else
 			{
-// VOWELS AND DIPTHONGS ENDING WITH IY SOUND flag set?
+// VOWELS AND DIPHTONGS ENDING WITH IY SOUND flag set?
 				A = flags[Y] & 32;
-				if (debug) if (A==0) printf("RULE: K <VOWEL OR DIPTHONG NOT ENDING WITH IY> -> KX <VOWEL OR DIPTHONG NOT ENDING WITH IY>\n");
+				if (debug) if (A==0) printf("RULE: K <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> KX <VOWEL OR DIPHTONG NOT ENDING WITH IY>\n");
 // Replace with KX
 				if (A == 0) phonemeindex[pos] = 75;  // 'KX'
 			}
 		}
 		else
 
 // RULE:
-//             G <VOWEL OR DIPTHONG NOT ENDING WITH IY> -> GX <VOWEL OR DIPTHONG NOT ENDING WITH IY>
+//             G <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> GX <VOWEL OR DIPHTONG NOT ENDING WITH IY>
 // Example: GO
 
 
@@ -908,10 +908,10 @@ void Parser2()
 				pos++; continue;
 			}
 			else
-// If dipthong ending with YX, move continue processing next phoneme
+// If diphtong ending with YX, move continue processing next phoneme
 			if ((flags[index] & 32) != 0) {pos++; continue;}
 // replace G with GX and continue processing next phoneme
-			if (debug) printf("RULE: G <VOWEL OR DIPTHONG NOT ENDING WITH IY> -> GX <VOWEL OR DIPTHONG NOT ENDING WITH IY>\n");
+			if (debug) printf("RULE: G <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> GX <VOWEL OR DIPHTONG NOT ENDING WITH IY>\n");
 			phonemeindex[pos] = 63; // 'GX'
 			pos++;
 			continue;
@@ -1056,7 +1056,7 @@ void Parser2()
 //         <VOWEL> <UNVOICED CONSONANT> - increase vowel by 1/2 + 1
 //         <NASAL> <STOP CONSONANT> - set nasal = 5, consonant = 6
 //         <VOICED STOP CONSONANT> {optional silence} <STOP CONSONANT> - shorten both to 1/2 + 1
-//         <LIQUID CONSONANT> <DIPTHONG> - decrease by 2
+//         <LIQUID CONSONANT> <DIPHTONG> - decrease by 2
 
 
 //void Code48619()
@@ -1382,8 +1382,8 @@ if (debug) printf("phoneme %d (%c%c) length %d\n", debugX-1, signInputTable1[pho
 
         // WH, R*, L*, W*, Y*, Q*, Z*, ZH, V*, DH, J*, **, 
 
-        // RULE: <VOICED NON-VOWEL> <DIPTHONG>
-        //       Decrease <DIPTHONG> by 2
+        // RULE: <VOICED NON-VOWEL> <DIPHTONG>
+        //       Decrease <DIPHTONG> by 2
 
         // liquic consonant?
         if ((flags2[index] & 16) != 0)
@@ -1395,9 +1395,9 @@ if (debug) printf("phoneme %d (%c%c) length %d\n", debugX-1, signInputTable1[pho
 
             // prior phoneme a stop consonant>
             if((flags[index] & 2) != 0)
-                             // Rule: <LIQUID CONSONANT> <DIPTHONG>
+                             // Rule: <LIQUID CONSONANT> <DIPHTONG>
 
-if (debug) printf("RULE: <LIQUID CONSONANT> <DIPTHONG> - decrease by 2\n");
+if (debug) printf("RULE: <LIQUID CONSONANT> <DIPHTONG> - decrease by 2\n");
 if (debug) printf("PRE\n");
 if (debug) printf("phoneme %d (%c%c) length %d\n", X, signInputTable1[phonemeindex[X]], signInputTable2[phonemeindex[X]], phonemeLength[X]);