changeset 5284:56cd3444a2d8

NormalizeIdentifier: suggestion for future implementations
author Alain Mazy <am@osimis.io>
date Mon, 24 Apr 2023 18:04:07 +0200
parents 57c0fba5e5c7
children bb07c12e3472
files OrthancServer/Sources/ServerToolbox.cpp
diffstat 1 files changed, 27 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/OrthancServer/Sources/ServerToolbox.cpp	Fri Apr 21 10:35:07 2023 +0200
+++ b/OrthancServer/Sources/ServerToolbox.cpp	Mon Apr 24 18:04:07 2023 +0200
@@ -211,6 +211,13 @@
       std::string t;
       t.reserve(value.size());
 
+#if 0
+      // This version solves some indexing issue (https://discourse.orthanc-server.org/t/postgress-index-effectively-disabled-when-searching-for-greek-names/3371)
+      // and seems functional: I could run the integration tests with both SQLite and PG + the DicomWeb tests with PG.
+      // However, it can not go into production because NormalizeIdentifier is used both at ingest time and at search time;
+      // therefore, if we change it while we have an already populated DB, the searches won't work anymore and, on very large
+      // systems, running the Housekeeper to rebuild the indexes might take months ...
+      // We keep it here because it might be handy once we refactor the DicomIdentifier searches in the future.
       for (size_t i = 0; i < value.size(); i++)
       {
         if (value[i] == '%' ||
@@ -218,7 +225,7 @@
         {
           t.push_back(' ');  // These characters might break wildcard queries in SQL
         }
-        else if (isascii(value[i]) &&
+        else if (//isascii(value[i]) &&
                  !iscntrl(value[i]) &&
                  (!isspace(value[i]) || value[i] == ' '))
         {
@@ -226,7 +233,25 @@
         }
       }
 
-      Toolbox::ToUpperCase(t);
+      //Toolbox::ToUpperCase(t);
+      t = Toolbox::ToUpperCaseWithAccents(t);
+#else
+      for (size_t i = 0; i < value.size(); i++)
+      {
+        if (value[i] == '%' ||
+            value[i] == '_')
+        {
+          t.push_back(' ');  // These characters might break wildcard queries in SQL
+        }
+        else if (!iscntrl(value[i]) &&
+                 (!isspace(value[i]) || value[i] == ' '))
+        {
+          t.push_back(value[i]);
+        }
+      }
+
+      t = Toolbox::ToUpperCaseWithAccents(t);
+#endif
 
       return Toolbox::StripSpaces(t);
     }