diff --git a/src/assemble.cpp b/src/assemble.cpp index e7f7801..a5ca587 100644 --- a/src/assemble.cpp +++ b/src/assemble.cpp @@ -32,6 +32,7 @@ #include "objectcode.h" #include "asmexception.h" #include "sourcecode.h" +#include "stringutils.h" using namespace std; @@ -165,7 +166,7 @@ int LineParser::GetInstructionAndAdvanceColumn(bool requireDistinctOpcodes) bool bMatch = true; for ( unsigned int j = 0; j < len; j++ ) { - if ( token[ j ] != toupper( m_line[ m_column + j ] ) ) + if ( token[ j ] != Ascii::ToUpper( m_line[ m_column + j ] ) ) { bMatch = false; break; @@ -480,7 +481,7 @@ void LineParser::HandleAssembler( int instruction ) // see if it's accumulator mode - if ( m_column < m_line.length() && toupper( m_line[ m_column ] ) == 'A' && HasAddressingMode( instruction, ACC ) ) + if ( m_column < m_line.length() && Ascii::ToUpper( m_line[ m_column ] ) == 'A' && HasAddressingMode( instruction, ACC ) ) { // might be... but only if the next character is a separator or whitespace // otherwise, we must assume a label beginning with A @@ -591,7 +592,7 @@ void LineParser::HandleAssembler( int instruction ) throw AsmException_SyntaxError_BadIndirect( m_line, m_column ); } - if ( toupper( m_line[ m_column ] ) != 'Y' ) + if ( Ascii::ToUpper( m_line[ m_column ] ) != 'Y' ) { // We were expecting an Y throw AsmException_SyntaxError_BadIndirect( m_line, m_column ); @@ -648,7 +649,7 @@ void LineParser::HandleAssembler( int instruction ) throw AsmException_SyntaxError_BadIndirect( m_line, m_column ); } - if ( toupper( m_line[ m_column ] ) != 'X' ) + if ( Ascii::ToUpper( m_line[ m_column ] ) != 'X' ) { // We were expecting an X throw AsmException_SyntaxError_BadIndirect( m_line, m_column ); @@ -826,7 +827,7 @@ void LineParser::HandleAssembler( int instruction ) throw AsmException_SyntaxError_BadAbsolute( m_line, m_column ); } - if ( m_column < m_line.length() && toupper( m_line[ m_column ] ) == 'X' ) + if ( m_column < m_line.length() && Ascii::ToUpper( m_line[ m_column ] ) == 'X' ) { m_column++; @@ -857,7 +858,7 @@ void LineParser::HandleAssembler( int instruction ) } } - if ( m_column < m_line.length() && toupper( m_line[ m_column ] ) == 'Y' ) + if ( m_column < m_line.length() && Ascii::ToUpper( m_line[ m_column ] ) == 'Y' ) { m_column++; diff --git a/src/commands.cpp b/src/commands.cpp index 08fbece..bb3bd0f 100644 --- a/src/commands.cpp +++ b/src/commands.cpp @@ -123,7 +123,7 @@ int LineParser::GetTokenAndAdvanceColumn() bool bMatch = true; for ( unsigned int j = 0; j < len; j++ ) { - if ( token[ j ] != toupper( m_line[ m_column + j ] ) ) + if ( token[ j ] != Ascii::ToUpper( m_line[ m_column + j ] ) ) { bMatch = false; break; @@ -493,7 +493,7 @@ void LineParser::HandleDefineLabel() } } - if ( ( m_column < m_line.length() ) && ( isalpha( m_line[ m_column ] ) || m_line[ m_column ] == '_' ) ) + if ( ( m_column < m_line.length() ) && ( Ascii::IsAlpha( m_line[ m_column ] ) || m_line[ m_column ] == '_' ) ) { // Symbol starts with a valid character @@ -1252,7 +1252,7 @@ void LineParser::HandleFor() // first look for the variable name - if ( !isalpha( m_line[ m_column ] ) && m_line[ m_column ] != '_' ) + if ( !Ascii::IsAlpha( m_line[ m_column ] ) && m_line[ m_column ] != '_' ) { throw AsmException_SyntaxError_InvalidSymbolName( m_line, m_column ); } @@ -1677,7 +1677,7 @@ void LineParser::HandleMacro() string macroName; - if ( isalpha( m_line[ m_column ] ) || m_line[ m_column ] == '_' ) + if ( Ascii::IsAlpha( m_line[ m_column ] ) || m_line[ m_column ] == '_' ) { macroName = GetSymbolName(); @@ -1713,7 +1713,7 @@ void LineParser::HandleMacro() throw AsmException_SyntaxError_MissingComma( m_line, m_column ); } } - else if ( isalpha( m_line[ m_column ] ) || m_line[ m_column ] == '_' ) + else if ( Ascii::IsAlpha( m_line[ m_column ] ) || m_line[ m_column ] == '_' ) { string param = GetSymbolName(); diff --git a/src/discimage.cpp b/src/discimage.cpp index 7fff2e4..7680cbf 100644 --- a/src/discimage.cpp +++ b/src/discimage.cpp @@ -26,6 +26,7 @@ #include "discimage.h" #include "asmexception.h" #include "globaldata.h" +#include "stringutils.h" using namespace std; @@ -208,14 +209,14 @@ void DiscImage::AddFile( const char* pName, const unsigned char* pAddr, int load for ( size_t j = 0; j < 7; j++ ) { - if ( toupper( pPaddedName[ j ] ) != toupper( m_aCatalog[ i + j ] ) ) + if ( Ascii::ToUpper( pPaddedName[ j ] ) != Ascii::ToUpper( m_aCatalog[ i + j ] ) ) { bTheSame = false; break; } } - if ( bTheSame && ( toupper( m_aCatalog[ i + 7 ] & 0x7F ) ) == toupper( dirName ) ) + if ( bTheSame && ( Ascii::ToUpper( m_aCatalog[ i + 7 ] & 0x7F ) ) == Ascii::ToUpper( dirName ) ) { // File already exists throw AsmException_FileError_FileExists( m_outputFilename ); diff --git a/src/expression.cpp b/src/expression.cpp index d519303..8aa339a 100644 --- a/src/expression.cpp +++ b/src/expression.cpp @@ -204,7 +204,7 @@ Value LineParser::GetValue() } value = String(text.data(), text.size()); } - else if ( m_column < m_line.length() && ( isalpha( m_line[ m_column ] ) || m_line[ m_column ] == '_' ) ) + else if ( m_column < m_line.length() && ( Ascii::IsAlpha( m_line[ m_column ] ) || m_line[ m_column ] == '_' ) ) { // get a symbol @@ -284,7 +284,7 @@ Value LineParser::EvaluateExpression( bool bAllowOneMismatchedCloseBracket ) bool bMatch = true; for ( unsigned int j = 0; j < len; j++ ) { - if ( ( m_column + j >= m_line.length() ) || ( token[ j ] != toupper( m_line[ m_column + j ] ) ) ) + if ( ( m_column + j >= m_line.length() ) || ( token[ j ] != Ascii::ToUpper( m_line[ m_column + j ] ) ) ) { bMatch = false; break; @@ -400,7 +400,7 @@ Value LineParser::EvaluateExpression( bool bAllowOneMismatchedCloseBracket ) bool bMatch = true; for ( unsigned int j = 0; j < len; j++ ) { - if ( ( m_column + j >= m_line.length() ) || ( token[ j ] != toupper( m_line[ m_column + j ] ) ) ) + if ( ( m_column + j >= m_line.length() ) || ( token[ j ] != Ascii::ToUpper( m_line[ m_column + j ] ) ) ) { bMatch = false; break; diff --git a/src/lineparser.cpp b/src/lineparser.cpp index a43bb3f..59a7859 100644 --- a/src/lineparser.cpp +++ b/src/lineparser.cpp @@ -96,15 +96,15 @@ void LineParser::Process( string line ) bool bIsSymbolAssignment = false; - if ( isalpha( m_line[ m_column ] ) || m_line[ m_column ] == '_' ) + if ( Ascii::IsAlpha( m_line[ m_column ] ) || m_line[ m_column ] == '_' ) { do { m_column++; } while ( m_column < m_line.length() && - ( isalpha( m_line[ m_column ] ) || - isdigit( m_line[ m_column ] ) || + ( Ascii::IsAlpha( m_line[ m_column ] ) || + Ascii::IsDigit( m_line[ m_column ] ) || m_line[ m_column ] == '_' || m_line[ m_column ] == '%' || m_line[ m_column ] == '$' ) && @@ -216,7 +216,7 @@ void LineParser::Process( string line ) // Check macro matches - if ( isalpha( m_line[ m_column ] ) || m_line[ m_column ] == '_' ) + if ( Ascii::IsAlpha( m_line[ m_column ] ) || m_line[ m_column ] == '_' ) { string macroName = GetSymbolName(); const Macro* macro = MacroTable::Instance().Get( macroName ); @@ -547,7 +547,7 @@ bool LineParser::AdvanceAndCheckEndOfSubStatement(bool includeComma) /*************************************************************************************************/ string LineParser::GetSymbolName() { - assert( isalpha( m_line[ m_column ] ) || m_line[ m_column ] == '_' ); + assert( Ascii::IsAlpha( m_line[ m_column ] ) || m_line[ m_column ] == '_' ); string symbolName; @@ -556,8 +556,8 @@ string LineParser::GetSymbolName() symbolName += m_line[ m_column++ ]; } while ( m_column < m_line.length() && - ( isalpha( m_line[ m_column ] ) || - isdigit( m_line[ m_column ] ) || + ( Ascii::IsAlpha( m_line[ m_column ] ) || + Ascii::IsDigit( m_line[ m_column ] ) || m_line[ m_column ] == '_' || m_line[ m_column ] == '%' || m_line[ m_column ] == '$' ) && diff --git a/src/literals.cpp b/src/literals.cpp index 113a676..1f67847 100644 --- a/src/literals.cpp +++ b/src/literals.cpp @@ -25,12 +25,7 @@ #include "asmexception.h" #include "literals.h" - -// Don't use isdigit because compilers can't agree on its locale dependence -static bool is_decimal_digit(char c) -{ - return '0' <= c && c <= '9'; -} +#include "stringutils.h" static int hex_digit_value(char c) { @@ -119,7 +114,7 @@ static bool CopyDigitsSkippingUnderscores(const std::string& line, size_t& index } size_t start_index = index; - while ( index < line.length() && (is_decimal_digit(line[index]) || line[index] == '_') ) + while ( index < line.length() && (Ascii::IsDigit(line[index]) || line[index] == '_') ) { if (line[index] == '_') { @@ -165,7 +160,7 @@ bool Literals::ParseNumeric(const std::string& line, size_t& index, double& resu return false; } - if ( is_decimal_digit(line[index]) || line[index] == '.' || line[index] == '-' ) + if ( Ascii::IsDigit(line[index]) || line[index] == '.' || line[index] == '-' ) { // Copy the number without underscores to this buffer std::string buffer; @@ -198,7 +193,7 @@ bool Literals::ParseNumeric(const std::string& line, size_t& index, double& resu // Copy exponent if it's followed by a sign or digit if ( index + 1 < line.length() && ( line[index] == 'e' || line[index] == 'E' ) && - ( line[index + 1] == '+' || line[index + 1] == '-' || is_decimal_digit(line[index + 1]) ) ) + ( line[index + 1] == '+' || line[index + 1] == '-' || Ascii::IsDigit(line[index + 1]) ) ) { buffer.push_back('e'); index++; diff --git a/src/stringutils.h b/src/stringutils.h index 62fcaa1..f952964 100644 --- a/src/stringutils.h +++ b/src/stringutils.h @@ -34,4 +34,47 @@ namespace StringUtils } +// Built-in char functions like isdigit, toupper, etc. are locale dependent and quite slow. +struct Ascii +{ + static bool IsAlpha(char c) + { + unsigned int uc = static_cast(c); + return (uc | 0x20) - 'a' < 26; + } + + static bool IsDigit(char c) + { + unsigned int uc = static_cast(c); + return (uc - '0') < 10; + } + + static char ToLower(char c) + { + unsigned int uc = static_cast(c); + if (uc - 'A' < 26) + { + return c | 0x20; + } + else + { + return c; + } + } + + static char ToUpper(char c) + { + unsigned int uc = static_cast(c); + if (uc - 'a' < 26) + { + return c & 0xDF; + } + else + { + return c; + } + } +}; + + #endif // STRINGUTILS_H_ diff --git a/src/symboltable.cpp b/src/symboltable.cpp index 85211f0..67cb30b 100644 --- a/src/symboltable.cpp +++ b/src/symboltable.cpp @@ -32,6 +32,7 @@ #include "constants.h" #include "asmexception.h" #include "literals.h" +#include "stringutils.h" using namespace std; @@ -189,8 +190,8 @@ bool SymbolTable::AddCommandLineSymbol( const std::string& expr ) } for ( std::string::size_type i = 0; i < symbol.length(); ++i ) { - bool valid = ( isalpha( symbol[ i ] ) || ( symbol[ i ] == '_' ) ); - valid = valid || ( ( i > 0 ) && isdigit( symbol[ i ] ) ); + bool valid = ( Ascii::IsAlpha( symbol[ i ] ) || ( symbol[ i ] == '_' ) ); + valid = valid || ( ( i > 0 ) && Ascii::IsDigit( symbol[ i ] ) ); if ( !valid ) { return false; diff --git a/src/value.h b/src/value.h index ce3e1fa..80891e5 100644 --- a/src/value.h +++ b/src/value.h @@ -27,6 +27,8 @@ #include #include +#include "stringutils.h" + // A simple immutable string buffer with a length and a reference count. // This doesn't have constructors, etc. so it can be stuffed into a union. struct StringHeader @@ -133,7 +135,7 @@ struct StringHeader char* pdata = StringBuffer(result); for (unsigned int i = 0; i != length; ++i) { - *pdata = ascii_upper(*pdata); + *pdata = Ascii::ToUpper(*pdata); ++pdata; } } @@ -149,7 +151,7 @@ struct StringHeader char* pdata = StringBuffer(result); for (unsigned int i = 0; i != length; ++i) { - *pdata = ascii_lower(*pdata); + *pdata = Ascii::ToLower(*pdata); ++pdata; } } @@ -191,24 +193,6 @@ struct StringHeader return 1; } - static char ascii_lower(char c) - { - if (('A' <= c) && (c <= 'Z')) - { - c += 'a' - 'A'; - } - return c; - } - - static char ascii_upper(char c) - { - if (('a' <= c) && (c <= 'z')) - { - c -= 'a' - 'A'; - } - return c; - } - }; // A simple immutable string.