Skip to content

Commit

Permalink
Transition table clarification and fixed typos
Browse files Browse the repository at this point in the history
  • Loading branch information
s-macke committed Nov 10, 2014
1 parent 66b3414 commit b4d5ed9
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 33 deletions.
2 changes: 1 addition & 1 deletion src/SamTabs.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ VOWELS
13 | AX | 10100100 |
14 | IX | 10100100 |
DIPTHONGS
DIPHTONGS
48 | EY | 10110100 |
49 | AY | 10110100 |
50 | OY | 10110100 |
Expand Down
114 changes: 104 additions & 10 deletions src/render.c
Original file line number Diff line number Diff line change
Expand Up @@ -438,40 +438,134 @@ do
// -------------------
//pos47694:


// CREATE TRANSITIONS
//
// Linear transitions are now created to smoothly connect each
// phoeneme. This transition is spread between the ending frames
// of the old phoneme (outBlendLength), and the beginning frames
// of the new phoneme (inBlendLength).
// Linear transitions are now created to smoothly connect the
// end of one sustained portion of a phoneme to the following
// phoneme.
//
// To do this, three tables are used:
//
// Table Purpose
// ========= ==================================================
// blendRank Determines which phoneme's blend values are used.
//
// blendOut The number of frames at the end of the phoneme that
// will be used to transition to the following phoneme.
//
// blendIn The number of frames of the following phoneme that
// will be used to transition into that phoneme.
//
// In creating a transition between two phonemes, the phoneme
// with the HIGHEST rank is used. Phonemes are ranked on how much
// their identity is based on their transitions. For example,
// vowels are and diphthongs are identified by their sustained portion,
// rather than the transitions, so they are given low values. In contrast,
// stop consonants (P, B, T, K) and glides (Y, L) are almost entirely
// defined by their transitions, and are given high rank values.
//
// Here are the rankings used by SAM:
//
// Rank Type Phonemes
// 2 All vowels IY, IH, etc.
// 5 Diphthong endings YX, WX, ER
// 8 Terminal liquid consonants LX, WX, YX, N, NX
// 9 Liquid consonants L, RX, W
// 10 Glide R, OH
// 11 Glide WH
// 18 Voiceless fricatives S, SH, F, TH
// 20 Voiced fricatives Z, ZH, V, DH
// 23 Plosives, stop consonants P, T, K, KX, DX, CH
// 26 Stop consonants J, GX, B, D, G
// 27-29 Stop consonants (internal) **
// 30 Unvoiced consonants /H, /X and Q*
// 160 Nasal M
//
// To determine how many frames to use, the two phonemes are
// compared using the blendRank[] table. The phoneme with the
// smaller score is used. In case of a tie, a blend of each is used:
// higher rank is selected. In case of a tie, a blend of each is used:
//
// if blendRank[phoneme1] == blendRank[phomneme2]
// // use lengths from each phoneme
// outBlendFrames = outBlend[phoneme1]
// inBlendFrames = outBlend[phoneme2]
// else if blendRank[phoneme1] < blendRank[phoneme2]
// else if blendRank[phoneme1] > blendRank[phoneme2]
// // use lengths from first phoneme
// outBlendFrames = outBlendLength[phoneme1]
// inBlendFrames = inBlendLength[phoneme1]
// else
// // use lengths from the second phoneme
// // note that in and out are swapped around!
// // note that in and out are SWAPPED!
// outBlendFrames = inBlendLength[phoneme2]
// inBlendFrames = outBlendLength[phoneme2]
//
// Blend lengths can't be less than zero.
// Blend lengths can't be less than zero.
//
// Transitions are assumed to be symetrical, so if the transition
// values for the second phoneme are used, the inBlendLength and
// outBlendLength values are SWAPPED.
//
// For most of the parameters, SAM interpolates over the range of the last
// outBlendFrames-1 and the first inBlendFrames.
//
// The exception to this is the Pitch[] parameter, which is interpolates the
// pitch from the center of the current phoneme to the center of the next
// pitch from the CENTER of the current phoneme to the CENTER of the next
// phoneme.
//
// Here are two examples. First, For example, consider the word "SUN" (S AH N)
//
// Phoneme Duration BlendWeight OutBlendFrames InBlendFrames
// S 2 18 1 3
// AH 8 2 4 4
// N 7 8 1 2
//
// The formant transitions for the output frames are calculated as follows:
//
// flags ampl1 freq1 ampl2 freq2 ampl3 freq3 pitch
// ------------------------------------------------
// S
// 241 0 6 0 73 0 99 61 Use S (weight 18) for transition instead of AH (weight 2)
// 241 0 6 0 73 0 99 61 <-- (OutBlendFrames-1) = (1-1) = 0 frames
// AH
// 0 2 10 2 66 0 96 59 * <-- InBlendFrames = 3 frames
// 0 4 14 3 59 0 93 57 *
// 0 8 18 5 52 0 90 55 *
// 0 15 22 9 44 1 87 53
// 0 15 22 9 44 1 87 53
// 0 15 22 9 44 1 87 53 Use N (weight 8) for transition instead of AH (weight 2).
// 0 15 22 9 44 1 87 53 Since N is second phoneme, reverse the IN and OUT values.
// 0 11 17 8 47 1 98 56 * <-- (InBlendFrames-1) = (2-1) = 1 frames
// N
// 0 8 12 6 50 1 109 58 * <-- OutBlendFrames = 1
// 0 5 6 5 54 0 121 61
// 0 5 6 5 54 0 121 61
// 0 5 6 5 54 0 121 61
// 0 5 6 5 54 0 121 61
// 0 5 6 5 54 0 121 61
// 0 5 6 5 54 0 121 61
//
// Now, consider the reverse "NUS" (N AH S):
//
// flags ampl1 freq1 ampl2 freq2 ampl3 freq3 pitch
// ------------------------------------------------
// N
// 0 5 6 5 54 0 121 61
// 0 5 6 5 54 0 121 61
// 0 5 6 5 54 0 121 61
// 0 5 6 5 54 0 121 61
// 0 5 6 5 54 0 121 61
// 0 5 6 5 54 0 121 61 Use N (weight 8) for transition instead of AH (weight 2)
// 0 5 6 5 54 0 121 61 <-- (OutBlendFrames-1) = (1-1) = 0 frames
// AH
// 0 8 11 6 51 0 110 59 * <-- InBlendFrames = 2
// 0 11 16 8 48 0 99 56 *
// 0 15 22 9 44 1 87 53 Use S (weight 18) for transition instead of AH (weight 2)
// 0 15 22 9 44 1 87 53 Since S is second phoneme, reverse the IN and OUT values.
// 0 9 18 5 51 1 90 55 * <-- (InBlendFrames-1) = (3-1) = 2
// 0 4 14 3 58 1 93 57 *
// S
// 241 2 10 2 65 1 96 59 * <-- OutBlendFrames = 1
// 241 0 6 0 73 0 99 61

A = 0;
mem44 = 0;
Expand Down
44 changes: 22 additions & 22 deletions src/sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ void InsertBreath()
// In those cases, the stress value+1 from the following phoneme is copied.
//
// For example, the word LOITER is represented as LOY5TER, with as stress
// of 5 on the dipthong OY. This routine will copy the stress value of 6 (5+1)
// of 5 on the diphtong OY. This routine will copy the stress value of 6 (5+1)
// to the L that precedes it.


Expand Down Expand Up @@ -587,8 +587,8 @@ void Code41240()

// Rewrites the phonemes using the following rules:
//
// <DIPTHONG ENDING WITH WX> -> <DIPTHONG ENDING WITH WX> WX
// <DIPTHONG NOT ENDING WITH WX> -> <DIPTHONG NOT ENDING WITH WX> YX
// <DIPHTONG ENDING WITH WX> -> <DIPHTONG ENDING WITH WX> WX
// <DIPHTONG NOT ENDING WITH WX> -> <DIPHTONG NOT ENDING WITH WX> YX
// UL -> AX L
// UM -> AX M
// <STRESSED VOWEL> <SILENCE> <STRESSED VOWEL> -> <STRESSED VOWEL> <SILENCE> Q <VOWEL>
Expand All @@ -597,8 +597,8 @@ void Code41240()
// <VOWEL> R -> <VOWEL> RX
// <VOWEL> L -> <VOWEL> LX
// G S -> G Z
// K <VOWEL OR DIPTHONG NOT ENDING WITH IY> -> KX <VOWEL OR DIPTHONG NOT ENDING WITH IY>
// G <VOWEL OR DIPTHONG NOT ENDING WITH IY> -> GX <VOWEL OR DIPTHONG NOT ENDING WITH IY>
// K <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> KX <VOWEL OR DIPHTONG NOT ENDING WITH IY>
// G <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> GX <VOWEL OR DIPHTONG NOT ENDING WITH IY>
// S P -> S B
// S T -> S D
// S K -> S G
Expand Down Expand Up @@ -644,15 +644,15 @@ void Parser2()
Y = A;

// RULE:
// <DIPTHONG ENDING WITH WX> -> <DIPTHONG ENDING WITH WX> WX
// <DIPTHONG NOT ENDING WITH WX> -> <DIPTHONG NOT ENDING WITH WX> YX
// <DIPHTONG ENDING WITH WX> -> <DIPHTONG ENDING WITH WX> WX
// <DIPHTONG NOT ENDING WITH WX> -> <DIPHTONG NOT ENDING WITH WX> YX
// Example: OIL, COW


// Check for DIPTHONG
// Check for DIPHTONG
if ((flags[A] & 16) == 0) goto pos41457;

// Not a dipthong. Get the stress
// Not a diphthong. Get the stress
mem58 = stress[pos];

// End in IY sound?
Expand All @@ -663,8 +663,8 @@ void Parser2()
//pos41443:
// Insert at WX or YX following, copying the stress

if (debug) if (A==20) printf("RULE: insert WX following dipthong NOT ending in IY sound\n");
if (debug) if (A==21) printf("RULE: insert YX following dipthong ending in IY sound\n");
if (debug) if (A==20) printf("RULE: insert WX following diphtong NOT ending in IY sound\n");
if (debug) if (A==21) printf("RULE: insert YX following diphtong ending in IY sound\n");
Insert(pos+1, A, mem59, mem58);
X = pos;
// Jump to ???
Expand Down Expand Up @@ -870,7 +870,7 @@ void Parser2()
}

// RULE:
// K <VOWEL OR DIPTHONG NOT ENDING WITH IY> -> KX <VOWEL OR DIPTHONG NOT ENDING WITH IY>
// K <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> KX <VOWEL OR DIPHTONG NOT ENDING WITH IY>
// Example: COW

// Is current phoneme K?
Expand All @@ -882,17 +882,17 @@ void Parser2()
if (Y == 255) phonemeindex[pos] = 75; // ML : prevents an index out of bounds problem
else
{
// VOWELS AND DIPTHONGS ENDING WITH IY SOUND flag set?
// VOWELS AND DIPHTONGS ENDING WITH IY SOUND flag set?
A = flags[Y] & 32;
if (debug) if (A==0) printf("RULE: K <VOWEL OR DIPTHONG NOT ENDING WITH IY> -> KX <VOWEL OR DIPTHONG NOT ENDING WITH IY>\n");
if (debug) if (A==0) printf("RULE: K <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> KX <VOWEL OR DIPHTONG NOT ENDING WITH IY>\n");
// Replace with KX
if (A == 0) phonemeindex[pos] = 75; // 'KX'
}
}
else

// RULE:
// G <VOWEL OR DIPTHONG NOT ENDING WITH IY> -> GX <VOWEL OR DIPTHONG NOT ENDING WITH IY>
// G <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> GX <VOWEL OR DIPHTONG NOT ENDING WITH IY>
// Example: GO


Expand All @@ -908,10 +908,10 @@ void Parser2()
pos++; continue;
}
else
// If dipthong ending with YX, move continue processing next phoneme
// If diphtong ending with YX, move continue processing next phoneme
if ((flags[index] & 32) != 0) {pos++; continue;}
// replace G with GX and continue processing next phoneme
if (debug) printf("RULE: G <VOWEL OR DIPTHONG NOT ENDING WITH IY> -> GX <VOWEL OR DIPTHONG NOT ENDING WITH IY>\n");
if (debug) printf("RULE: G <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> GX <VOWEL OR DIPHTONG NOT ENDING WITH IY>\n");
phonemeindex[pos] = 63; // 'GX'
pos++;
continue;
Expand Down Expand Up @@ -1056,7 +1056,7 @@ void Parser2()
// <VOWEL> <UNVOICED CONSONANT> - increase vowel by 1/2 + 1
// <NASAL> <STOP CONSONANT> - set nasal = 5, consonant = 6
// <VOICED STOP CONSONANT> {optional silence} <STOP CONSONANT> - shorten both to 1/2 + 1
// <LIQUID CONSONANT> <DIPTHONG> - decrease by 2
// <LIQUID CONSONANT> <DIPHTONG> - decrease by 2


//void Code48619()
Expand Down Expand Up @@ -1382,8 +1382,8 @@ if (debug) printf("phoneme %d (%c%c) length %d\n", debugX-1, signInputTable1[pho

// WH, R*, L*, W*, Y*, Q*, Z*, ZH, V*, DH, J*, **,

// RULE: <VOICED NON-VOWEL> <DIPTHONG>
// Decrease <DIPTHONG> by 2
// RULE: <VOICED NON-VOWEL> <DIPHTONG>
// Decrease <DIPHTONG> by 2

// liquic consonant?
if ((flags2[index] & 16) != 0)
Expand All @@ -1395,9 +1395,9 @@ if (debug) printf("phoneme %d (%c%c) length %d\n", debugX-1, signInputTable1[pho

// prior phoneme a stop consonant>
if((flags[index] & 2) != 0)
// Rule: <LIQUID CONSONANT> <DIPTHONG>
// Rule: <LIQUID CONSONANT> <DIPHTONG>

if (debug) printf("RULE: <LIQUID CONSONANT> <DIPTHONG> - decrease by 2\n");
if (debug) printf("RULE: <LIQUID CONSONANT> <DIPHTONG> - decrease by 2\n");
if (debug) printf("PRE\n");
if (debug) printf("phoneme %d (%c%c) length %d\n", X, signInputTable1[phonemeindex[X]], signInputTable2[phonemeindex[X]], phonemeLength[X]);

Expand Down

0 comments on commit b4d5ed9

Please sign in to comment.