Mercurial > hg > orthanc-stone
comparison OrthancStone/Sources/Toolbox/GenericToolbox.h @ 1748:b6a6ad64192a
FastParseVector : manually written code to parse strings like 3.1315\-1.2e12\2344.5\123 into boost::numeric::ublas::vector<double> + tests
author | Benjamin Golinvaux <bgo@osimis.io> |
---|---|
date | Mon, 22 Feb 2021 14:55:13 +0100 |
parents | 9ac2a65d4172 |
children | 3889ae96d2e9 |
comparison
equal
deleted
inserted
replaced
1747:730604db88b8 | 1748:b6a6ad64192a |
---|---|
21 | 21 |
22 #pragma once | 22 #pragma once |
23 | 23 |
24 #include <Compatibility.h> | 24 #include <Compatibility.h> |
25 #include <OrthancException.h> | 25 #include <OrthancException.h> |
26 #include <Logging.h> | |
27 | |
28 #include "LinearAlgebra.h" | |
26 | 29 |
27 #include <boost/shared_ptr.hpp> | 30 #include <boost/shared_ptr.hpp> |
28 | 31 |
29 #include <string> | 32 #include <string> |
30 #include <stdint.h> | 33 #include <stdint.h> |
31 #include <math.h> | 34 #include <math.h> |
32 | 35 |
33 #include <memory> | 36 #include <memory> |
37 #include <vector> | |
34 | 38 |
35 namespace OrthancStone | 39 namespace OrthancStone |
36 { | 40 { |
37 namespace GenericToolbox | 41 namespace GenericToolbox |
38 { | 42 { |
39 /** | 43 /** |
40 Fast floating point string validation. | 44 Fast floating point string validation. |
41 No trimming applied, so the input must match regex | 45 No trimming applied, so the input must match regex |
42 /^[-]?[0-9]*\.?[0-9]*([eE][-+]?[0-9]+)?$/ | 46 /^[-]?[0-9]*\.?[0-9]*([eE][-+]?[0-9]+)?$/ |
43 The following are allowed as edge cases: "" and "-" | 47 The following are allowed as edge cases: "" and "-" |
44 */ | 48 |
45 inline bool LegitDoubleString(const char* text) | 49 The parsing always stops if encountering either 0 or the stopChar |
50 | |
51 */ | |
52 inline bool LegitDoubleString(const char* text, char stopChar = 0) | |
46 { | 53 { |
47 const char* p = text; | 54 const char* p = text; |
48 if(*p == '-') | 55 if (*p == '-') |
49 p++; | 56 p++; |
50 size_t period = 0; | 57 size_t period = 0; |
51 while(*p != 0) | 58 while ((*p != 0) && (*p != stopChar) && (*p != ' ') && (*p != '\t')) |
52 { | 59 { |
53 if (*p >= '0' && *p <= '9') | 60 if (*p >= '0' && *p <= '9') |
54 ++p; | 61 ++p; |
55 else if(*p == '.') | 62 else if (*p == '.') |
56 { | 63 { |
57 if(period > 0) | 64 if (period > 0) |
58 return false; | 65 return false; |
59 else | 66 else |
60 period++; | 67 period++; |
61 ++p; | 68 ++p; |
62 } | 69 } |
63 else if (*p == 'e' || *p == 'E') | 70 else if (*p == 'e' || *p == 'E') |
64 { | 71 { |
65 ++p; | 72 ++p; |
66 if (*p == '-' || *p == '+') | 73 if (*p == '-' || *p == '+') |
68 // "e+"/"E+" "e-"/"E-" or "e"/"E" must be followed by a number | 75 // "e+"/"E+" "e-"/"E-" or "e"/"E" must be followed by a number |
69 if (!(*p >= '0' && *p <= '9')) | 76 if (!(*p >= '0' && *p <= '9')) |
70 return false; | 77 return false; |
71 | 78 |
72 // these must be the last in the string | 79 // these must be the last in the string |
73 while(*p >= '0' && *p <= '9') | 80 while (*p >= '0' && *p <= '9') |
74 ++p; | 81 ++p; |
75 | 82 |
76 return (*p == 0); | 83 // after that, there can only be spaces |
84 while ((*p != 0) && (*p != stopChar)) | |
85 { | |
86 if ((*p != ' ') && (*p != '\t')) | |
87 return false; | |
88 ++p; | |
89 } | |
90 | |
91 return ((*p == 0) || (*p == stopChar)); | |
77 } | 92 } |
78 else | 93 else |
79 { | 94 { |
80 return false; | 95 return false; |
81 } | 96 } |
82 } | 97 } |
98 | |
99 // we only accept trailing whitespace | |
100 while ((*p != 0) && (*p != stopChar)) | |
101 { | |
102 if( (*p != ' ') && (*p != '\t')) | |
103 return false; | |
104 ++p; | |
105 } | |
83 return true; | 106 return true; |
84 } | 107 } |
108 | |
109 | |
85 | 110 |
86 /** | 111 /** |
87 Fast integer string validation. | 112 Fast integer string validation. |
88 No trimming applied, so the input must match regex /^-?[0-9]*$/ | 113 No trimming applied, so the input must match regex /^-?[0-9]*$/ |
89 The following are allowed as edge cases: "" and "-" | 114 The following are allowed as edge cases: "" and "-" |
90 */ | 115 |
91 inline bool LegitIntegerString(const char* text) | 116 The parsing always stops if encountering either 0 or the stopChar |
117 | |
118 */ | |
119 inline bool LegitIntegerString(const char* text, char stopChar = 0) | |
92 { | 120 { |
93 const char* p = text; | 121 const char* p = text; |
94 if (*p == '-') | 122 if (*p == '-') |
95 p++; | 123 p++; |
96 while (*p != 0) | 124 while ((*p != 0) && (*p != stopChar)) |
97 { | 125 { |
98 if (*p >= '0' && *p <= '9') | 126 if (*p >= '0' && *p <= '9') |
99 ++p; | 127 ++p; |
100 else | 128 else |
101 return false; | 129 return false; |
102 } | 130 } |
103 return true; | 131 return true; |
104 } | 132 } |
105 | 133 |
106 /* | 134 |
107 Fast string --> double conversion. | 135 static const double FRAC_FACTORS[] = |
108 Must pass the LegitDoubleString test | 136 { |
109 | 137 1.0, |
110 String to doubles with at most 18 digits | 138 0.1, |
111 */ | 139 0.01, |
112 inline bool StringToDouble(double& r, const char* text) | 140 0.001, |
113 { | 141 0.0001, |
114 if(!LegitDoubleString(text)) | 142 0.00001, |
115 return false; | 143 0.000001, |
116 | 144 0.0000001, |
117 static const double FRAC_FACTORS[] = | 145 0.00000001, |
118 { | 146 0.000000001, |
119 1.0, | 147 0.0000000001, |
120 0.1, | 148 0.00000000001, |
121 0.01, | 149 0.000000000001, |
122 0.001, | 150 0.0000000000001, |
123 0.0001, | 151 0.00000000000001, |
124 0.00001, | 152 0.000000000000001, |
125 0.000001, | 153 0.0000000000000001, |
126 0.0000001, | 154 0.00000000000000001, |
127 0.00000001, | 155 0.000000000000000001, |
128 0.000000001, | 156 0.0000000000000000001 |
129 0.0000000001, | 157 }; |
130 0.00000000001, | 158 static const size_t FRAC_FACTORS_LEN = sizeof(FRAC_FACTORS) / sizeof(double); |
131 0.000000000001, | 159 |
132 0.0000000000001, | 160 /** |
133 0.00000000000001, | 161 Technical version of StringToDouble, meant for parsing bigger strings in-place. |
134 0.000000000000001, | 162 |
135 0.0000000000000001, | 163 Only works for dot decimal numbers without digit separation |
136 0.00000000000000001, | 164 |
137 0.000000000000000001, | 165 The parsing stops when encountering EITHER \x00 or stopChar. |
138 0.0000000000000000001 | 166 |
139 }; | 167 Instead of filling r and returning true if number is legit, it fills r then |
140 const size_t FRAC_FACTORS_LEN = sizeof(FRAC_FACTORS)/sizeof(double); | 168 returns the number of parsed characters (NOT including the end character (which |
169 can be zero, since an empty string is an allowed edge case) BUT including the trailing | |
170 spaces), or -1 if a parsing error occurred. | |
171 | |
172 Please note that if stopChar is a number, a minus sign, the decimal separator | |
173 or the letters e and E, the behavior is UNDEFINED!!! | |
174 | |
175 In order to allow the containing string not to be space-stripped: | |
176 - Spaces and tabs are ignored if they occur before the scientific notation e or E letter | |
177 - Spaces and tabs are ignored between the end of the number and the \x00 or stopChar | |
178 - Spaces and tabs cause errors anywhere else | |
179 | |
180 It is up to the caller to detect whether a successful parsing has reached the | |
181 terminator (\x00) or stopChar. | |
182 | |
183 In case of an error returned, in a parsing scenario where multiple numbers | |
184 are to be read in a bigger surrounding string, it is up to the caller to | |
185 recover from the error by advancing the read pointer to the next character, | |
186 if desirable. | |
187 | |
188 Example: | |
189 ------ | |
190 const char* s = "0.0/.123/3/12.5//-43.1"; | |
191 | |
192 int size; | |
193 double r; | |
194 const char* p = s; | |
195 | |
196 size = StringToDoubleEx(r, p, '/'); | |
197 // --> | |
198 // r = 0 and size = 3 | |
199 | |
200 p += size + 1; // gobble the separator | |
201 size = StringToDoubleEx(r, p, '/'); | |
202 // --> | |
203 // r = 0.123 and size = 4 | |
204 | |
205 p += size + 1; // gobble the separator | |
206 size = StringToDoubleEx(r, p, '/'); | |
207 // --> | |
208 // r = 3.0 and size = 1 | |
209 | |
210 p += size + 1; // gobble the separator | |
211 size = StringToDoubleEx(r, p, '/'); | |
212 // --> | |
213 // r = 12.5 and size = 3 | |
214 | |
215 p += size + 1; // gobble the separator | |
216 size = StringToDoubleEx(r, p, '/'); | |
217 // --> | |
218 // r = 0 and size = 0 | |
219 | |
220 p += size + 1; // gobble the separator | |
221 size = StringToDoubleEx(r, p, '/'); | |
222 // --> | |
223 // r = 0 and size = 0 | |
224 | |
225 p += size | |
226 if(p == 0) | |
227 ...stop parsing! | |
228 */ | |
229 | |
230 inline int32_t StringToDoubleEx(double& r, const char* text, char stopChar = 0) | |
231 { | |
232 if (!LegitDoubleString(text,stopChar)) | |
233 return -1; | |
141 | 234 |
142 r = 0.0; | 235 r = 0.0; |
143 double neg = 1.0; | 236 double neg = 1.0; |
144 const char* p = text; | 237 const char* p = text; |
145 | 238 |
149 ++p; | 242 ++p; |
150 } | 243 } |
151 // 12345.67890 | 244 // 12345.67890 |
152 while (*p >= '0' && *p <= '9') | 245 while (*p >= '0' && *p <= '9') |
153 { | 246 { |
154 r = (r*10.0) + (*p - '0'); // 1 12 123 123 12345 | 247 r = (r * 10.0) + (*p - '0'); // 1 12 123 123 12345 |
155 ++p; | 248 ++p; |
156 } | 249 } |
157 if (*p == '.') | 250 if (*p == '.') |
158 { | 251 { |
159 double f = 0.0; | 252 double f = 0.0; |
169 } | 262 } |
170 r *= neg; | 263 r *= neg; |
171 | 264 |
172 // skip the remaining numbers until we reach not-a-digit (either the | 265 // skip the remaining numbers until we reach not-a-digit (either the |
173 // end of the string OR the scientific notation symbol) | 266 // end of the string OR the scientific notation symbol) |
174 while ((*p >= '0' && *p <= '9')) | 267 // spaces are skipped in this phase here |
175 ++p; | 268 while ((*p >= '0' && *p <= '9') || *p == ' ' || *p == '\t') |
176 | 269 ++p; |
177 if (*p == 0 ) | 270 |
178 { | 271 if ( (*p == 0) || (*p == stopChar)) |
179 return true; | 272 { |
273 return static_cast<int32_t>(p - text); | |
180 } | 274 } |
181 else if ((*p == 'e') || (*p == 'E')) | 275 else if ((*p == 'e') || (*p == 'E')) |
182 { | 276 { |
183 // process the scientific notation | 277 // process the scientific notation |
184 double sign; // no init is safe (read below) | 278 double sign; // no init is safe (read below) |
200 sign = 1.0; | 294 sign = 1.0; |
201 } | 295 } |
202 else | 296 else |
203 { | 297 { |
204 // only a sign char or a number is allowed | 298 // only a sign char or a number is allowed |
205 return false; | 299 return -1; |
206 } | 300 } |
207 // now p points to the absolute value of the exponent | 301 // now p points to the absolute value of the exponent |
208 double exp = 0; | 302 double exp = 0; |
209 while (*p >= '0' && *p <= '9') | 303 while (*p >= '0' && *p <= '9') |
210 { | 304 { |
214 // now we have our exponent. put a sign on it. | 308 // now we have our exponent. put a sign on it. |
215 exp *= sign; | 309 exp *= sign; |
216 double scFac = ::pow(10.0, exp); | 310 double scFac = ::pow(10.0, exp); |
217 r *= scFac; | 311 r *= scFac; |
218 | 312 |
219 // only allowed symbol here is EOS | 313 // skip the trailing spaces |
220 return (*p == 0); | 314 while (*p == ' ' || *p == '\t') |
315 ++p; | |
316 | |
317 // only allowed symbol here is EOS or stopChar | |
318 if ((*p == 0) || (*p == stopChar)) | |
319 return static_cast<int32_t>(p - text); | |
320 else | |
321 return -1; | |
221 } | 322 } |
222 else | 323 else |
223 { | 324 { |
224 // not allowed | 325 // not allowed |
225 return false; | 326 return -1; |
226 } | 327 } |
227 } | 328 } |
228 | 329 |
330 /** | |
331 Fast string --> double conversion. | |
332 Must pass the LegitDoubleString test | |
333 | |
334 String to doubles with at most 18 digits | |
335 | |
336 Returns true if okay and false if failed. | |
337 | |
338 The end-of-substring is character \x00 | |
339 */ | |
340 inline bool StringToDouble(double& r, const char* text) | |
341 { | |
342 int32_t size = StringToDoubleEx(r, text, 0); | |
343 return (size != -1); | |
344 } | |
345 | |
346 /** | |
347 See main overload | |
348 */ | |
229 inline bool StringToDouble(double& r, const std::string& text) | 349 inline bool StringToDouble(double& r, const std::string& text) |
230 { | 350 { |
231 return StringToDouble(r, text.c_str()); | 351 return StringToDouble(r, text.c_str()); |
232 } | 352 } |
233 | 353 |
287 | 407 |
288 /** | 408 /** |
289 Same as GetRgbValuesFromString | 409 Same as GetRgbValuesFromString |
290 */ | 410 */ |
291 bool GetRgbaValuesFromString(uint8_t& red, | 411 bool GetRgbaValuesFromString(uint8_t& red, |
292 uint8_t& green, | 412 uint8_t& green, |
293 uint8_t& blue, | 413 uint8_t& blue, |
294 uint8_t& alpha, | 414 uint8_t& alpha, |
295 const char* text); | 415 const char* text); |
296 | 416 |
297 /** | 417 /** |
298 Same as GetRgbValuesFromString | 418 Same as GetRgbValuesFromString |
299 */ | 419 */ |
300 inline bool GetRgbaValuesFromString(uint8_t& red, | 420 inline bool GetRgbaValuesFromString(uint8_t& red, |
301 uint8_t& green, | 421 uint8_t& green, |
302 uint8_t& blue, | 422 uint8_t& blue, |
303 uint8_t& alpha, | 423 uint8_t& alpha, |
304 const std::string& text) | 424 const std::string& text) |
305 { | 425 { |
306 return GetRgbaValuesFromString(red, green, blue, alpha, text.c_str()); | 426 return GetRgbaValuesFromString(red, green, blue, alpha, text.c_str()); |
307 } | 427 } |
308 | 428 |
309 | 429 |
310 /** | 430 /** |
311 This method could have been called StripSpacesAndChangeToLower but we might want to | 431 This method could have been called StripSpacesAndChangeToLower but we might want to |
312 add some UUID validation to the argument | 432 add some UUID validation to the argument |
313 */ | 433 */ |
314 void NormalizeUuid(std::string& uuid); | 434 void NormalizeUuid(std::string& uuid); |
435 | |
436 | |
437 inline void FastTokenizeString(std::vector<std::string>& result, | |
438 const std::string& value, | |
439 char separator) | |
440 { | |
441 size_t countSeparators = 0; | |
442 | |
443 for (size_t i = 0; i < value.size(); i++) | |
444 { | |
445 if (value[i] == separator) | |
446 { | |
447 countSeparators++; | |
448 } | |
449 } | |
450 | |
451 result.clear(); | |
452 result.reserve(countSeparators + 1); | |
453 | |
454 std::string currentItem; | |
455 | |
456 for (size_t i = 0; i < value.size(); i++) | |
457 { | |
458 if (value[i] == separator) | |
459 { | |
460 result.push_back(currentItem); | |
461 currentItem.clear(); | |
462 } | |
463 else | |
464 { | |
465 currentItem.push_back(value[i]); | |
466 } | |
467 } | |
468 | |
469 result.push_back(currentItem); | |
470 } | |
471 | |
472 | |
473 inline std::string FastStripSpaces(const std::string& source) | |
474 { | |
475 size_t first = 0; | |
476 | |
477 while (first < source.length() && | |
478 isspace(source[first])) | |
479 { | |
480 first++; | |
481 } | |
482 | |
483 if (first == source.length()) | |
484 { | |
485 // String containing only spaces | |
486 return ""; | |
487 } | |
488 | |
489 size_t last = source.length(); | |
490 while (last > first && | |
491 isspace(source[last - 1])) | |
492 { | |
493 last--; | |
494 } | |
495 | |
496 assert(first <= last); | |
497 return source.substr(first, last - first); | |
498 } | |
499 | |
500 /** | |
501 Return the raw numbers of occurrences of `separator` in s (starting at s up to \x00) | |
502 */ | |
503 inline size_t GetCharCount(const char* s, const char separator) | |
504 { | |
505 const char* p = s; | |
506 size_t sepCount = 0; | |
507 | |
508 while (*p != 0) | |
509 { | |
510 if(*p == separator) | |
511 sepCount++; | |
512 ++p; | |
513 } | |
514 return sepCount; | |
515 } | |
516 | |
517 inline bool FastParseVector(Vector& target, const std::string& value) | |
518 { | |
519 const char* s = value.c_str(); | |
520 const char SEP = '\\'; | |
521 | |
522 size_t sepCount = GetCharCount(s, SEP); | |
523 | |
524 size_t itemCount = sepCount + 1; | |
525 target.resize(itemCount); | |
526 | |
527 while (*s == ' ' || *s == '\t') | |
528 ++s; | |
529 | |
530 const char* p = s; | |
531 | |
532 double r; | |
533 for (size_t i = 0; i < itemCount; i++) | |
534 { | |
535 int32_t numberCharCount = StringToDoubleEx(r, p, SEP); | |
536 if (numberCharCount == -1) | |
537 { | |
538 LOG(ERROR) << "Parsing error for vector \"" << value << "\". Current position (0-based) = " << i; | |
539 return false; | |
540 } | |
541 p += numberCharCount; | |
542 if (*p == 0) | |
543 { | |
544 // if we are at the end of the string, it means we have processed the last character | |
545 // let's check this. this is a small price to pay for a useful check | |
546 if (i != (itemCount - 1)) | |
547 { | |
548 LOG(ERROR) << "Parsing error for vector \"" << value << "\". Reached end of the string without consuming the right # of items! Current position (0-based) = " << i; | |
549 return false; | |
550 } | |
551 } | |
552 else | |
553 { | |
554 if (*p != SEP) | |
555 { | |
556 LOG(ERROR) << "Parsing error for vector \"" << value << "\". Character past end of number Reached end of the string without consuming the right # of items! Current position (0-based) = " << i << " and r = " << r; | |
557 return false; | |
558 } | |
559 if (i == (itemCount - 1)) | |
560 { | |
561 LOG(ERROR) << "Parsing error for vector \"" << value << "\". Reached end of the vector too soon. Current position (0-based) = " << i << " and r = " << r; | |
562 return false; | |
563 } | |
564 // advance to next number | |
565 p += 1; | |
566 } | |
567 target[i] = r; | |
568 } | |
569 return true; | |
570 } | |
571 | |
572 | |
315 } | 573 } |
316 } | 574 } |