Mercurial > hg > orthanc
changeset 6568:4e3a85a11878 case-sensitive-find tip
improved non latin searches wrt CaseSensitive & Since
| author | Alain Mazy <am@orthanc.team> |
|---|---|
| date | Tue, 20 Jan 2026 10:36:06 +0100 |
| parents | bb56a9cccf75 |
| children | |
| files | NEWS OrthancFramework/Sources/Toolbox.cpp OrthancFramework/Sources/Toolbox.h OrthancFramework/UnitTestsSources/FrameworkTests.cpp OrthancServer/Sources/Database/MainDicomTagsRegistry.cpp OrthancServer/Sources/Database/SQLiteDatabaseWrapper.cpp OrthancServer/Sources/Database/SQLiteDatabaseWrapper.h OrthancServer/Sources/OrthancRestApi/OrthancRestResources.cpp OrthancServer/Sources/Search/ISqlLookupFormatter.cpp |
| diffstat | 9 files changed, 111 insertions(+), 33 deletions(-) [+] |
line wrap: on
line diff
--- a/NEWS Tue Jan 20 09:19:56 2026 +0100 +++ b/NEWS Tue Jan 20 10:36:06 2026 +0100 @@ -14,10 +14,13 @@ * Fixed a security issue where one could hijack HTTP headers in the response through the `filename` argument of "/.../file" or "/.../archive" routes. (credits: Pavel Kohout, Aisle Research. www.aisle.com) -* In tools/find, filtering against "LabelsConstraint": "None" with an empty "Labels" list - now returns all resources that do not have any labels attached instead of returning all resources. - This applies to the default SQLite DB and will apply to the next PostgreSQL plugin (v 10.1) - +* In tools/find: + - Filtering against "LabelsConstraint": "None" with an empty "Labels" list + now returns all resources that do not have any labels attached instead of returning all resources. + This applies to the default SQLite DB and will apply to the next PostgreSQL plugin (v 10.1) + - Improved behavior wrt to non latin searches and `CaseSensitive` searches that can + now be combined with `Since`. + WIP: this needs to be validated from a performance point of view ! Maintenance -----------
--- a/OrthancFramework/Sources/Toolbox.cpp Tue Jan 20 09:19:56 2026 +0100 +++ b/OrthancFramework/Sources/Toolbox.cpp Tue Jan 20 10:36:06 2026 +0100 @@ -1950,8 +1950,7 @@ #endif } - - std::string Toolbox::ToUpperCaseWithAccents(const std::string& source) + static std::string ChangeCaseWithAccents(const std::string& source, bool toLowerCase) { bool error = (globalLocale_.get() == NULL); @@ -2000,9 +1999,29 @@ **/ std::wstring w = boost::locale::conv::utf_to_utf<wchar_t>(source, boost::locale::conv::skip); - w = boost::algorithm::to_upper_copy<std::wstring>(w, *globalLocale_); + if (toLowerCase) + { + w = boost::algorithm::to_lower_copy<std::wstring>(w, *globalLocale_); + } + else + { + w = boost::algorithm::to_upper_copy<std::wstring>(w, *globalLocale_); + } return boost::locale::conv::utf_to_utf<char>(w, boost::locale::conv::skip); } + + + std::string Toolbox::ToUpperCaseWithAccents(const std::string& source) + { + return ChangeCaseWithAccents(source, false); + } + + + std::string Toolbox::ToLowerCaseWithAccents(const std::string& source) + { + return ChangeCaseWithAccents(source, true); + } + #endif
--- a/OrthancFramework/Sources/Toolbox.h Tue Jan 20 09:19:56 2026 +0100 +++ b/OrthancFramework/Sources/Toolbox.h Tue Jan 20 10:36:06 2026 +0100 @@ -355,6 +355,8 @@ static void FinalizeGlobalLocale(); static std::string ToUpperCaseWithAccents(const std::string& source); + + static std::string ToLowerCaseWithAccents(const std::string& source); #endif static void InitializeOpenSsl();
--- a/OrthancFramework/UnitTestsSources/FrameworkTests.cpp Tue Jan 20 09:19:56 2026 +0100 +++ b/OrthancFramework/UnitTestsSources/FrameworkTests.cpp Tue Jan 20 10:36:06 2026 +0100 @@ -487,6 +487,15 @@ s = "CoU"; Toolbox::ToLowerCase(s); ASSERT_EQ("cou", s); + +#if ORTHANC_ENABLE_LOCALE == 1 + s = "éсйігдйомевінрие"; + s = Toolbox::ToUpperCaseWithAccents(s); + ASSERT_EQ("ÉСЙІГДЙОМЕВІНРИЕ", s); + + s = Toolbox::ToLowerCaseWithAccents(s); + ASSERT_EQ("éсйігдйомевінрие", s); +#endif }
--- a/OrthancServer/Sources/Database/MainDicomTagsRegistry.cpp Tue Jan 20 09:19:56 2026 +0100 +++ b/OrthancServer/Sources/Database/MainDicomTagsRegistry.cpp Tue Jan 20 10:36:06 2026 +0100 @@ -129,19 +129,31 @@ } bool isEquivalentConstraint; - - // DicomIdentifiers are stored UPPERCASE -> as soon as a case senstive search happens, it is currently not possible to perform it in DB only + std::unique_ptr<DatabaseDicomTagConstraint> dbConstraint; + + // DicomIdentifiers are stored UPPERCASE -> as soon as a case senstive search happens, + // it is currently not possible to perform it in DB only on the Identifiers table + // but it can be performed on the MainDicomTags table -> transform the constraint if (type == DicomTagType_Identifier && source.GetConstraint(i).IsCaseSensitive()) { - canBeFullyPerformedInDb = false; + dbConstraint.reset(source.GetConstraint(i).ConvertToDatabaseConstraint(isEquivalentConstraint, level, DicomTagType_Main)); + canBeFullyPerformedInDb = true; } - - target.AddConstraint(source.GetConstraint(i).ConvertToDatabaseConstraint(isEquivalentConstraint, level, type)); + else + { + dbConstraint.reset(source.GetConstraint(i).ConvertToDatabaseConstraint(isEquivalentConstraint, level, type)); + if (!isEquivalentConstraint && type == DicomTagType_Identifier) + { + dbConstraint.reset(source.GetConstraint(i).ConvertToDatabaseConstraint(isEquivalentConstraint, level, DicomTagType_Main)); + } + } if (!isEquivalentConstraint) { isEquivalentLookup = false; } + + target.AddConstraint(dbConstraint.release()); } else {
--- a/OrthancServer/Sources/Database/SQLiteDatabaseWrapper.cpp Tue Jan 20 09:19:56 2026 +0100 +++ b/OrthancServer/Sources/Database/SQLiteDatabaseWrapper.cpp Tue Jan 20 10:36:06 2026 +0100 @@ -226,6 +226,38 @@ }; + class SQLiteDatabaseWrapper::LowerWithAccents : public SQLite::IScalarFunction + { + private: + bool hasRemainingAncestor_; + std::string remainingPublicId_; + ResourceType remainingType_; + + public: + LowerWithAccents() + { + } + + virtual const char* GetName() const ORTHANC_OVERRIDE + { + return "lower_with_accents"; + } + + virtual unsigned int GetCardinality() const ORTHANC_OVERRIDE + { + return 1; + } + + virtual void Compute(SQLite::FunctionContext& context) ORTHANC_OVERRIDE + { + std::string source = context.GetStringValue(0); + std::string modified = Toolbox::ToLowerCaseWithAccents(source); + + context.SetStringResult(modified); + } + + }; + class SQLiteDatabaseWrapper::TransactionBase : public SQLiteDatabaseWrapper::UnitTestsTransaction, public Compatibility::ICreateInstance, @@ -2769,7 +2801,7 @@ signalRemainingAncestor_ = dynamic_cast<SignalRemainingAncestor*>(db_.Register(new SignalRemainingAncestor)); db_.Register(new SignalFileDeleted(*this)); db_.Register(new SignalResourceDeleted(*this)); - + db_.Register(new LowerWithAccents()); db_.Execute("PRAGMA ENCODING=\"UTF-8\";"); // Performance tuning of SQLite with PRAGMAs
--- a/OrthancServer/Sources/Database/SQLiteDatabaseWrapper.h Tue Jan 20 09:19:56 2026 +0100 +++ b/OrthancServer/Sources/Database/SQLiteDatabaseWrapper.h Tue Jan 20 10:36:06 2026 +0100 @@ -43,6 +43,7 @@ class SignalFileDeleted; class SignalResourceDeleted; class SignalRemainingAncestor; + class LowerWithAccents; class ReadOnlyTransaction; class ReadWriteTransaction; class LookupFormatter;
--- a/OrthancServer/Sources/OrthancRestApi/OrthancRestResources.cpp Tue Jan 20 09:19:56 2026 +0100 +++ b/OrthancServer/Sources/OrthancRestApi/OrthancRestResources.cpp Tue Jan 20 10:36:06 2026 +0100 @@ -3335,18 +3335,18 @@ { caseSensitive = request[KEY_CASE_SENSITIVE].asBool(); - if (requestType == FindType_Count && caseSensitive) - { - /** - * Explanation: "/tools/find" uses "lookup_->IsMatch(tags)" in "ResourceFinder::Execute()" - * to apply case sensitiveness (as the database stores tags with PN VR in lower case). - * But, the purpose of "/tools/count-resources" is to speed up the counting the number of - * matching resources: Calling "lookup_->IsMatch(tags)" would require gathering the main - * DICOM tags, which would lead to no speedup wrt. "/tools/find". - **/ - throw OrthancException(ErrorCode_ParameterOutOfRange, "Setting \"" + std::string(KEY_CASE_SENSITIVE) + - "\" to \"true\" is not supported by /tools/count-resources"); - } + // if (requestType == FindType_Count && caseSensitive) + // { + // /** + // * Explanation: "/tools/find" uses "lookup_->IsMatch(tags)" in "ResourceFinder::Execute()" + // * to apply case sensitiveness (as the database stores tags with PN VR in lower case). + // * But, the purpose of "/tools/count-resources" is to speed up the counting the number of + // * matching resources: Calling "lookup_->IsMatch(tags)" would require gathering the main + // * DICOM tags, which would lead to no speedup wrt. "/tools/find". + // **/ + // throw OrthancException(ErrorCode_ParameterOutOfRange, "Setting \"" + std::string(KEY_CASE_SENSITIVE) + + // "\" to \"true\" is not supported by /tools/count-resources"); + // } } { // DICOM Tag query
--- a/OrthancServer/Sources/Search/ISqlLookupFormatter.cpp Tue Jan 20 09:19:56 2026 +0100 +++ b/OrthancServer/Sources/Search/ISqlLookupFormatter.cpp Tue Jan 20 10:36:06 2026 +0100 @@ -123,7 +123,7 @@ } else { - comparison = "lower(" + tag + ".value) " + op + " lower(" + parameter + ")"; + comparison = "lower_with_accents(" + tag + ".value) " + op + " lower_with_accents(" + parameter + ")"; } break; @@ -146,7 +146,7 @@ } else { - comparison += "lower(" + parameter + ")"; + comparison += "lower_with_accents(" + parameter + ")"; } } @@ -156,7 +156,7 @@ } else { - comparison = "lower(" + tag + ".value) IN (" + comparison + ")"; + comparison = "lower_with_accents(" + tag + ".value) IN (" + comparison + ")"; } break; @@ -224,7 +224,7 @@ } else { - comparison = ("lower(" + tag + ".value) LIKE lower(" + + comparison = ("lower_with_accents(" + tag + ".value) LIKE lower_with_accents(" + parameter + ") " + formatter.FormatWildcardEscape()); } } @@ -451,7 +451,7 @@ } else { - comparison = " AND lower(value) " + op + " lower(" + parameter + ")"; + comparison = " AND lower_with_accents(value) " + op + " lower_with_accents(" + parameter + ")"; } break; @@ -470,7 +470,7 @@ } else { - comparisonValues.push_back("lower(" + parameter + ")"); + comparisonValues.push_back("lower_with_accents(" + parameter + ")"); } } @@ -483,7 +483,7 @@ } else { - comparison = " AND lower(value) IN (" + values + ")"; + comparison = " AND lower_with_accents(value) IN (" + values + ")"; } break; @@ -550,7 +550,7 @@ } else { - comparison = " AND lower(value) LIKE lower(" + parameter + ") " + formatter.FormatWildcardEscape(); + comparison = " AND lower_with_accents(value) LIKE lower_with_accents(" + parameter + ") " + formatter.FormatWildcardEscape(); } }
