changeset 6568:4e3a85a11878 case-sensitive-find tip

improved non latin searches wrt CaseSensitive & Since
author Alain Mazy <am@orthanc.team>
date Tue, 20 Jan 2026 10:36:06 +0100
parents bb56a9cccf75
children
files NEWS OrthancFramework/Sources/Toolbox.cpp OrthancFramework/Sources/Toolbox.h OrthancFramework/UnitTestsSources/FrameworkTests.cpp OrthancServer/Sources/Database/MainDicomTagsRegistry.cpp OrthancServer/Sources/Database/SQLiteDatabaseWrapper.cpp OrthancServer/Sources/Database/SQLiteDatabaseWrapper.h OrthancServer/Sources/OrthancRestApi/OrthancRestResources.cpp OrthancServer/Sources/Search/ISqlLookupFormatter.cpp
diffstat 9 files changed, 111 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Tue Jan 20 09:19:56 2026 +0100
+++ b/NEWS	Tue Jan 20 10:36:06 2026 +0100
@@ -14,10 +14,13 @@
 * Fixed a security issue where one could hijack HTTP headers in the response
   through the `filename` argument of "/.../file" or "/.../archive" routes.
   (credits: Pavel Kohout, Aisle Research. www.aisle.com)
-* In tools/find, filtering against "LabelsConstraint": "None" with an empty "Labels" list
-  now returns all resources that do not have any labels attached instead of returning all resources.
-  This applies to the default SQLite DB and will apply to the next PostgreSQL plugin (v 10.1)
-
+* In tools/find:
+  - Filtering against "LabelsConstraint": "None" with an empty "Labels" list
+    now returns all resources that do not have any labels attached instead of returning all resources.
+    This applies to the default SQLite DB and will apply to the next PostgreSQL plugin (v 10.1)
+  - Improved behavior wrt to non latin searches and `CaseSensitive` searches that can 
+    now be combined with `Since`.
+  WIP: this needs to be validated from a performance point of view !
 
 Maintenance
 -----------
--- a/OrthancFramework/Sources/Toolbox.cpp	Tue Jan 20 09:19:56 2026 +0100
+++ b/OrthancFramework/Sources/Toolbox.cpp	Tue Jan 20 10:36:06 2026 +0100
@@ -1950,8 +1950,7 @@
 #endif
   }
 
-
-  std::string Toolbox::ToUpperCaseWithAccents(const std::string& source)
+  static std::string ChangeCaseWithAccents(const std::string& source, bool toLowerCase)
   {
     bool error = (globalLocale_.get() == NULL);
 
@@ -2000,9 +1999,29 @@
      **/
 
     std::wstring w = boost::locale::conv::utf_to_utf<wchar_t>(source, boost::locale::conv::skip);
-    w = boost::algorithm::to_upper_copy<std::wstring>(w, *globalLocale_);
+    if (toLowerCase)
+    {
+      w = boost::algorithm::to_lower_copy<std::wstring>(w, *globalLocale_);
+    }
+    else
+    {
+      w = boost::algorithm::to_upper_copy<std::wstring>(w, *globalLocale_);
+    }
     return boost::locale::conv::utf_to_utf<char>(w, boost::locale::conv::skip);
   }
+
+
+  std::string Toolbox::ToUpperCaseWithAccents(const std::string& source)
+  {
+    return ChangeCaseWithAccents(source, false);
+  }
+
+
+  std::string Toolbox::ToLowerCaseWithAccents(const std::string& source)
+  {
+    return ChangeCaseWithAccents(source, true);
+  }
+
 #endif
 
 
--- a/OrthancFramework/Sources/Toolbox.h	Tue Jan 20 09:19:56 2026 +0100
+++ b/OrthancFramework/Sources/Toolbox.h	Tue Jan 20 10:36:06 2026 +0100
@@ -355,6 +355,8 @@
     static void FinalizeGlobalLocale();
 
     static std::string ToUpperCaseWithAccents(const std::string& source);
+
+    static std::string ToLowerCaseWithAccents(const std::string& source);
 #endif
 
     static void InitializeOpenSsl();
--- a/OrthancFramework/UnitTestsSources/FrameworkTests.cpp	Tue Jan 20 09:19:56 2026 +0100
+++ b/OrthancFramework/UnitTestsSources/FrameworkTests.cpp	Tue Jan 20 10:36:06 2026 +0100
@@ -487,6 +487,15 @@
   s = "CoU";
   Toolbox::ToLowerCase(s);
   ASSERT_EQ("cou", s);
+
+#if ORTHANC_ENABLE_LOCALE == 1
+  s = "éсйігдйомевінрие";
+  s = Toolbox::ToUpperCaseWithAccents(s);
+  ASSERT_EQ("ÉСЙІГДЙОМЕВІНРИЕ", s);
+
+  s = Toolbox::ToLowerCaseWithAccents(s);
+  ASSERT_EQ("éсйігдйомевінрие", s);
+#endif
 }
 
 
--- a/OrthancServer/Sources/Database/MainDicomTagsRegistry.cpp	Tue Jan 20 09:19:56 2026 +0100
+++ b/OrthancServer/Sources/Database/MainDicomTagsRegistry.cpp	Tue Jan 20 10:36:06 2026 +0100
@@ -129,19 +129,31 @@
         }
 
         bool isEquivalentConstraint;
-        
-        // DicomIdentifiers are stored UPPERCASE -> as soon as a case senstive search happens, it is currently not possible to perform it in DB only
+        std::unique_ptr<DatabaseDicomTagConstraint> dbConstraint;
+
+        // DicomIdentifiers are stored UPPERCASE -> as soon as a case senstive search happens, 
+        // it is currently not possible to perform it in DB only on the Identifiers table 
+        // but it can be performed on the MainDicomTags table -> transform the constraint
         if (type == DicomTagType_Identifier && source.GetConstraint(i).IsCaseSensitive())
         {
-          canBeFullyPerformedInDb = false;
+          dbConstraint.reset(source.GetConstraint(i).ConvertToDatabaseConstraint(isEquivalentConstraint, level, DicomTagType_Main));
+          canBeFullyPerformedInDb = true;
         }
-
-        target.AddConstraint(source.GetConstraint(i).ConvertToDatabaseConstraint(isEquivalentConstraint, level, type));
+        else
+        {
+          dbConstraint.reset(source.GetConstraint(i).ConvertToDatabaseConstraint(isEquivalentConstraint, level, type));
+          if (!isEquivalentConstraint && type == DicomTagType_Identifier)
+          {
+            dbConstraint.reset(source.GetConstraint(i).ConvertToDatabaseConstraint(isEquivalentConstraint, level, DicomTagType_Main));
+          }
+        }
 
         if (!isEquivalentConstraint)
         {
           isEquivalentLookup = false;
         }
+
+        target.AddConstraint(dbConstraint.release());
       }
       else
       {
--- a/OrthancServer/Sources/Database/SQLiteDatabaseWrapper.cpp	Tue Jan 20 09:19:56 2026 +0100
+++ b/OrthancServer/Sources/Database/SQLiteDatabaseWrapper.cpp	Tue Jan 20 10:36:06 2026 +0100
@@ -226,6 +226,38 @@
   };
 
 
+  class SQLiteDatabaseWrapper::LowerWithAccents : public SQLite::IScalarFunction
+  {
+  private:
+    bool hasRemainingAncestor_;
+    std::string remainingPublicId_;
+    ResourceType remainingType_;
+
+  public:
+    LowerWithAccents()
+    {
+    }
+
+    virtual const char* GetName() const ORTHANC_OVERRIDE
+    {
+      return "lower_with_accents";
+    }
+
+    virtual unsigned int GetCardinality() const ORTHANC_OVERRIDE
+    {
+      return 1;
+    }
+
+    virtual void Compute(SQLite::FunctionContext& context) ORTHANC_OVERRIDE
+    {
+      std::string source = context.GetStringValue(0);
+      std::string modified = Toolbox::ToLowerCaseWithAccents(source);
+
+      context.SetStringResult(modified);
+    }
+
+  };
+
   class SQLiteDatabaseWrapper::TransactionBase :
     public SQLiteDatabaseWrapper::UnitTestsTransaction,
     public Compatibility::ICreateInstance,
@@ -2769,7 +2801,7 @@
       signalRemainingAncestor_ = dynamic_cast<SignalRemainingAncestor*>(db_.Register(new SignalRemainingAncestor));
       db_.Register(new SignalFileDeleted(*this));
       db_.Register(new SignalResourceDeleted(*this));
-    
+      db_.Register(new LowerWithAccents());
       db_.Execute("PRAGMA ENCODING=\"UTF-8\";");
 
       // Performance tuning of SQLite with PRAGMAs
--- a/OrthancServer/Sources/Database/SQLiteDatabaseWrapper.h	Tue Jan 20 09:19:56 2026 +0100
+++ b/OrthancServer/Sources/Database/SQLiteDatabaseWrapper.h	Tue Jan 20 10:36:06 2026 +0100
@@ -43,6 +43,7 @@
     class SignalFileDeleted;
     class SignalResourceDeleted;
     class SignalRemainingAncestor;
+    class LowerWithAccents;
     class ReadOnlyTransaction;
     class ReadWriteTransaction;
     class LookupFormatter;
--- a/OrthancServer/Sources/OrthancRestApi/OrthancRestResources.cpp	Tue Jan 20 09:19:56 2026 +0100
+++ b/OrthancServer/Sources/OrthancRestApi/OrthancRestResources.cpp	Tue Jan 20 10:36:06 2026 +0100
@@ -3335,18 +3335,18 @@
         {
           caseSensitive = request[KEY_CASE_SENSITIVE].asBool();
 
-          if (requestType == FindType_Count && caseSensitive)
-          {
-            /**
-             * Explanation: "/tools/find" uses "lookup_->IsMatch(tags)" in "ResourceFinder::Execute()"
-             * to apply case sensitiveness (as the database stores tags with PN VR in lower case).
-             * But, the purpose of "/tools/count-resources" is to speed up the counting the number of
-             * matching resources: Calling "lookup_->IsMatch(tags)" would require gathering the main
-             * DICOM tags, which would lead to no speedup wrt. "/tools/find".
-             **/
-            throw OrthancException(ErrorCode_ParameterOutOfRange, "Setting \"" + std::string(KEY_CASE_SENSITIVE) +
-                                   "\" to \"true\" is not supported by /tools/count-resources");
-          }
+          // if (requestType == FindType_Count && caseSensitive)
+          // {
+          //   /**
+          //    * Explanation: "/tools/find" uses "lookup_->IsMatch(tags)" in "ResourceFinder::Execute()"
+          //    * to apply case sensitiveness (as the database stores tags with PN VR in lower case).
+          //    * But, the purpose of "/tools/count-resources" is to speed up the counting the number of
+          //    * matching resources: Calling "lookup_->IsMatch(tags)" would require gathering the main
+          //    * DICOM tags, which would lead to no speedup wrt. "/tools/find".
+          //    **/
+          //   throw OrthancException(ErrorCode_ParameterOutOfRange, "Setting \"" + std::string(KEY_CASE_SENSITIVE) +
+          //                          "\" to \"true\" is not supported by /tools/count-resources");
+          // }
         }
 
         { // DICOM Tag query
--- a/OrthancServer/Sources/Search/ISqlLookupFormatter.cpp	Tue Jan 20 09:19:56 2026 +0100
+++ b/OrthancServer/Sources/Search/ISqlLookupFormatter.cpp	Tue Jan 20 10:36:06 2026 +0100
@@ -123,7 +123,7 @@
         }
         else
         {
-          comparison = "lower(" + tag + ".value) " + op + " lower(" + parameter + ")";
+          comparison = "lower_with_accents(" + tag + ".value) " + op + " lower_with_accents(" + parameter + ")";
         }
 
         break;
@@ -146,7 +146,7 @@
           }
           else
           {
-            comparison += "lower(" + parameter + ")";
+            comparison += "lower_with_accents(" + parameter + ")";
           }
         }
 
@@ -156,7 +156,7 @@
         }
         else
         {
-          comparison = "lower(" +  tag + ".value) IN (" + comparison + ")";
+          comparison = "lower_with_accents(" +  tag + ".value) IN (" + comparison + ")";
         }
             
         break;
@@ -224,7 +224,7 @@
           }
           else
           {
-            comparison = ("lower(" + tag + ".value) LIKE lower(" +
+            comparison = ("lower_with_accents(" + tag + ".value) LIKE lower_with_accents(" +
                           parameter + ") " + formatter.FormatWildcardEscape());
           }
         }
@@ -451,7 +451,7 @@
         }
         else
         {
-          comparison = " AND lower(value) " + op + " lower(" + parameter + ")";
+          comparison = " AND lower_with_accents(value) " + op + " lower_with_accents(" + parameter + ")";
         }
 
         break;
@@ -470,7 +470,7 @@
           }
           else
           {
-            comparisonValues.push_back("lower(" + parameter + ")");
+            comparisonValues.push_back("lower_with_accents(" + parameter + ")");
           }
         }
 
@@ -483,7 +483,7 @@
         }
         else
         {
-          comparison = " AND lower(value) IN (" + values + ")";
+          comparison = " AND lower_with_accents(value) IN (" + values + ")";
         }
             
         break;
@@ -550,7 +550,7 @@
           }
           else
           {
-            comparison = " AND lower(value) LIKE lower(" + parameter + ") " + formatter.FormatWildcardEscape();
+            comparison = " AND lower_with_accents(value) LIKE lower_with_accents(" + parameter + ") " + formatter.FormatWildcardEscape();
           }
         }