Mercurial > hg > orthanc-stone
diff OrthancStone/Sources/Toolbox/GenericToolbox.h @ 1748:b6a6ad64192a
FastParseVector : manually written code to parse strings like 3.1315\-1.2e12\2344.5\123 into boost::numeric::ublas::vector<double> + tests
author | Benjamin Golinvaux <bgo@osimis.io> |
---|---|
date | Mon, 22 Feb 2021 14:55:13 +0100 |
parents | 9ac2a65d4172 |
children | 3889ae96d2e9 |
line wrap: on
line diff
--- a/OrthancStone/Sources/Toolbox/GenericToolbox.h Fri Feb 12 11:09:07 2021 +0100 +++ b/OrthancStone/Sources/Toolbox/GenericToolbox.h Mon Feb 22 14:55:13 2021 +0100 @@ -23,6 +23,9 @@ #include <Compatibility.h> #include <OrthancException.h> +#include <Logging.h> + +#include "LinearAlgebra.h" #include <boost/shared_ptr.hpp> @@ -31,6 +34,7 @@ #include <math.h> #include <memory> +#include <vector> namespace OrthancStone { @@ -38,27 +42,30 @@ { /** Fast floating point string validation. - No trimming applied, so the input must match regex + No trimming applied, so the input must match regex /^[-]?[0-9]*\.?[0-9]*([eE][-+]?[0-9]+)?$/ The following are allowed as edge cases: "" and "-" + + The parsing always stops if encountering either 0 or the stopChar + */ - inline bool LegitDoubleString(const char* text) + inline bool LegitDoubleString(const char* text, char stopChar = 0) { const char* p = text; - if(*p == '-') + if (*p == '-') p++; size_t period = 0; - while(*p != 0) + while ((*p != 0) && (*p != stopChar) && (*p != ' ') && (*p != '\t')) { if (*p >= '0' && *p <= '9') ++p; - else if(*p == '.') + else if (*p == '.') { - if(period > 0) + if (period > 0) return false; else period++; - ++p; + ++p; } else if (*p == 'e' || *p == 'E') { @@ -70,30 +77,51 @@ return false; // these must be the last in the string - while(*p >= '0' && *p <= '9') + while (*p >= '0' && *p <= '9') ++p; - return (*p == 0); + // after that, there can only be spaces + while ((*p != 0) && (*p != stopChar)) + { + if ((*p != ' ') && (*p != '\t')) + return false; + ++p; + } + + return ((*p == 0) || (*p == stopChar)); } else { return false; } } + + // we only accept trailing whitespace + while ((*p != 0) && (*p != stopChar)) + { + if( (*p != ' ') && (*p != '\t')) + return false; + ++p; + } return true; } + + /** Fast integer string validation. No trimming applied, so the input must match regex /^-?[0-9]*$/ The following are allowed as edge cases: "" and "-" + + The parsing always stops if encountering either 0 or the stopChar + */ - inline bool LegitIntegerString(const char* text) + inline bool LegitIntegerString(const char* text, char stopChar = 0) { const char* p = text; if (*p == '-') p++; - while (*p != 0) + while ((*p != 0) && (*p != stopChar)) { if (*p >= '0' && *p <= '9') ++p; @@ -103,41 +131,106 @@ return true; } - /* - Fast string --> double conversion. - Must pass the LegitDoubleString test - String to doubles with at most 18 digits - */ - inline bool StringToDouble(double& r, const char* text) + static const double FRAC_FACTORS[] = { - if(!LegitDoubleString(text)) - return false; + 1.0, + 0.1, + 0.01, + 0.001, + 0.0001, + 0.00001, + 0.000001, + 0.0000001, + 0.00000001, + 0.000000001, + 0.0000000001, + 0.00000000001, + 0.000000000001, + 0.0000000000001, + 0.00000000000001, + 0.000000000000001, + 0.0000000000000001, + 0.00000000000000001, + 0.000000000000000001, + 0.0000000000000000001 + }; + static const size_t FRAC_FACTORS_LEN = sizeof(FRAC_FACTORS) / sizeof(double); + + /** + Technical version of StringToDouble, meant for parsing bigger strings in-place. + + Only works for dot decimal numbers without digit separation + + The parsing stops when encountering EITHER \x00 or stopChar. + + Instead of filling r and returning true if number is legit, it fills r then + returns the number of parsed characters (NOT including the end character (which + can be zero, since an empty string is an allowed edge case) BUT including the trailing + spaces), or -1 if a parsing error occurred. + + Please note that if stopChar is a number, a minus sign, the decimal separator + or the letters e and E, the behavior is UNDEFINED!!! + + In order to allow the containing string not to be space-stripped: + - Spaces and tabs are ignored if they occur before the scientific notation e or E letter + - Spaces and tabs are ignored between the end of the number and the \x00 or stopChar + - Spaces and tabs cause errors anywhere else + + It is up to the caller to detect whether a successful parsing has reached the + terminator (\x00) or stopChar. - static const double FRAC_FACTORS[] = - { - 1.0, - 0.1, - 0.01, - 0.001, - 0.0001, - 0.00001, - 0.000001, - 0.0000001, - 0.00000001, - 0.000000001, - 0.0000000001, - 0.00000000001, - 0.000000000001, - 0.0000000000001, - 0.00000000000001, - 0.000000000000001, - 0.0000000000000001, - 0.00000000000000001, - 0.000000000000000001, - 0.0000000000000000001 - }; - const size_t FRAC_FACTORS_LEN = sizeof(FRAC_FACTORS)/sizeof(double); + In case of an error returned, in a parsing scenario where multiple numbers + are to be read in a bigger surrounding string, it is up to the caller to + recover from the error by advancing the read pointer to the next character, + if desirable. + + Example: + ------ + const char* s = "0.0/.123/3/12.5//-43.1"; + + int size; + double r; + const char* p = s; + + size = StringToDoubleEx(r, p, '/'); + // --> + // r = 0 and size = 3 + + p += size + 1; // gobble the separator + size = StringToDoubleEx(r, p, '/'); + // --> + // r = 0.123 and size = 4 + + p += size + 1; // gobble the separator + size = StringToDoubleEx(r, p, '/'); + // --> + // r = 3.0 and size = 1 + + p += size + 1; // gobble the separator + size = StringToDoubleEx(r, p, '/'); + // --> + // r = 12.5 and size = 3 + + p += size + 1; // gobble the separator + size = StringToDoubleEx(r, p, '/'); + // --> + // r = 0 and size = 0 + + p += size + 1; // gobble the separator + size = StringToDoubleEx(r, p, '/'); + // --> + // r = 0 and size = 0 + + p += size + if(p == 0) + ...stop parsing! + */ + + inline int32_t StringToDoubleEx(double& r, const char* text, char stopChar = 0) + { + if (!LegitDoubleString(text,stopChar)) + return -1; r = 0.0; double neg = 1.0; @@ -151,7 +244,7 @@ // 12345.67890 while (*p >= '0' && *p <= '9') { - r = (r*10.0) + (*p - '0'); // 1 12 123 123 12345 + r = (r * 10.0) + (*p - '0'); // 1 12 123 123 12345 ++p; } if (*p == '.') @@ -171,12 +264,13 @@ // skip the remaining numbers until we reach not-a-digit (either the // end of the string OR the scientific notation symbol) - while ((*p >= '0' && *p <= '9')) + // spaces are skipped in this phase here + while ((*p >= '0' && *p <= '9') || *p == ' ' || *p == '\t') ++p; - if (*p == 0 ) + if ( (*p == 0) || (*p == stopChar)) { - return true; + return static_cast<int32_t>(p - text); } else if ((*p == 'e') || (*p == 'E')) { @@ -202,7 +296,7 @@ else { // only a sign char or a number is allowed - return false; + return -1; } // now p points to the absolute value of the exponent double exp = 0; @@ -216,16 +310,42 @@ double scFac = ::pow(10.0, exp); r *= scFac; - // only allowed symbol here is EOS - return (*p == 0); + // skip the trailing spaces + while (*p == ' ' || *p == '\t') + ++p; + + // only allowed symbol here is EOS or stopChar + if ((*p == 0) || (*p == stopChar)) + return static_cast<int32_t>(p - text); + else + return -1; } else { // not allowed - return false; + return -1; } } + /** + Fast string --> double conversion. + Must pass the LegitDoubleString test + + String to doubles with at most 18 digits + + Returns true if okay and false if failed. + + The end-of-substring is character \x00 + */ + inline bool StringToDouble(double& r, const char* text) + { + int32_t size = StringToDoubleEx(r, text, 0); + return (size != -1); + } + + /** + See main overload + */ inline bool StringToDouble(double& r, const std::string& text) { return StringToDouble(r, text.c_str()); @@ -289,28 +409,166 @@ Same as GetRgbValuesFromString */ bool GetRgbaValuesFromString(uint8_t& red, - uint8_t& green, - uint8_t& blue, - uint8_t& alpha, - const char* text); + uint8_t& green, + uint8_t& blue, + uint8_t& alpha, + const char* text); /** Same as GetRgbValuesFromString */ - inline bool GetRgbaValuesFromString(uint8_t& red, - uint8_t& green, - uint8_t& blue, - uint8_t& alpha, - const std::string& text) + inline bool GetRgbaValuesFromString(uint8_t& red, + uint8_t& green, + uint8_t& blue, + uint8_t& alpha, + const std::string& text) { return GetRgbaValuesFromString(red, green, blue, alpha, text.c_str()); } - + /** - This method could have been called StripSpacesAndChangeToLower but we might want to + This method could have been called StripSpacesAndChangeToLower but we might want to add some UUID validation to the argument */ void NormalizeUuid(std::string& uuid); + + + inline void FastTokenizeString(std::vector<std::string>& result, + const std::string& value, + char separator) + { + size_t countSeparators = 0; + + for (size_t i = 0; i < value.size(); i++) + { + if (value[i] == separator) + { + countSeparators++; + } + } + + result.clear(); + result.reserve(countSeparators + 1); + + std::string currentItem; + + for (size_t i = 0; i < value.size(); i++) + { + if (value[i] == separator) + { + result.push_back(currentItem); + currentItem.clear(); + } + else + { + currentItem.push_back(value[i]); + } + } + + result.push_back(currentItem); + } + + + inline std::string FastStripSpaces(const std::string& source) + { + size_t first = 0; + + while (first < source.length() && + isspace(source[first])) + { + first++; + } + + if (first == source.length()) + { + // String containing only spaces + return ""; + } + + size_t last = source.length(); + while (last > first && + isspace(source[last - 1])) + { + last--; + } + + assert(first <= last); + return source.substr(first, last - first); + } + + /** + Return the raw numbers of occurrences of `separator` in s (starting at s up to \x00) + */ + inline size_t GetCharCount(const char* s, const char separator) + { + const char* p = s; + size_t sepCount = 0; + + while (*p != 0) + { + if(*p == separator) + sepCount++; + ++p; + } + return sepCount; + } + + inline bool FastParseVector(Vector& target, const std::string& value) + { + const char* s = value.c_str(); + const char SEP = '\\'; + + size_t sepCount = GetCharCount(s, SEP); + + size_t itemCount = sepCount + 1; + target.resize(itemCount); + + while (*s == ' ' || *s == '\t') + ++s; + + const char* p = s; + + double r; + for (size_t i = 0; i < itemCount; i++) + { + int32_t numberCharCount = StringToDoubleEx(r, p, SEP); + if (numberCharCount == -1) + { + LOG(ERROR) << "Parsing error for vector \"" << value << "\". Current position (0-based) = " << i; + return false; + } + p += numberCharCount; + if (*p == 0) + { + // if we are at the end of the string, it means we have processed the last character + // let's check this. this is a small price to pay for a useful check + if (i != (itemCount - 1)) + { + LOG(ERROR) << "Parsing error for vector \"" << value << "\". Reached end of the string without consuming the right # of items! Current position (0-based) = " << i; + return false; + } + } + else + { + if (*p != SEP) + { + LOG(ERROR) << "Parsing error for vector \"" << value << "\". Character past end of number Reached end of the string without consuming the right # of items! Current position (0-based) = " << i << " and r = " << r; + return false; + } + if (i == (itemCount - 1)) + { + LOG(ERROR) << "Parsing error for vector \"" << value << "\". Reached end of the vector too soon. Current position (0-based) = " << i << " and r = " << r; + return false; + } + // advance to next number + p += 1; + } + target[i] = r; + } + return true; + } + + } }