Mercurial > hg > orthanc-databases
changeset 639:f4b132734936
PG: new AllowInconsistentChildCounts (and new optimized method to compute child counts)
author | Alain Mazy <am@orthanc.team> |
---|---|
date | Fri, 14 Feb 2025 12:34:26 +0100 (2 months ago) |
parents | 77b9684c57b4 |
children | 480344e7316d |
files | Framework/Plugins/IndexBackend.cpp Framework/Plugins/IndexBackend.h Framework/Plugins/IndexConnectionsPool.cpp Framework/PostgreSQL/PostgreSQLParameters.cpp Framework/PostgreSQL/PostgreSQLParameters.h MySQL/Plugins/MySQLIndex.h Odbc/Plugins/OdbcIndex.h PostgreSQL/CMakeLists.txt PostgreSQL/NEWS PostgreSQL/Plugins/PostgreSQLIndex.cpp PostgreSQL/Plugins/PostgreSQLIndex.h PostgreSQL/Plugins/SQL/Downgrades/Rev4ToRev3.sql PostgreSQL/Plugins/SQL/PrepareIndex.sql PostgreSQL/Plugins/SQL/Upgrades/Rev2ToRev3.sql PostgreSQL/Plugins/SQL/Upgrades/Rev3ToRev4.sql SQLite/Plugins/SQLiteIndex.h |
diffstat | 16 files changed, 400 insertions(+), 209 deletions(-) [+] |
line wrap: on
line diff
--- a/Framework/Plugins/IndexBackend.cpp Tue Feb 04 10:06:33 2025 +0100 +++ b/Framework/Plugins/IndexBackend.cpp Fri Feb 14 12:34:26 2025 +0100 @@ -312,9 +312,11 @@ IndexBackend::IndexBackend(OrthancPluginContext* context, - bool readOnly) : + bool readOnly, + bool allowInconsistentChildCounts) : context_(context), - readOnly_(readOnly) + readOnly_(readOnly), + allowInconsistentChildCounts_(allowInconsistentChildCounts) { } @@ -3316,6 +3318,29 @@ // why we have generic column names. // So, at the end we'll have only one very big query ! + const Orthanc::DatabasePluginMessages::Find_Request_ChildrenSpecification* childrenSpec = NULL; + const Orthanc::DatabasePluginMessages::Find_Request_ChildrenSpecification* grandchildrenSpec = NULL; + const Orthanc::DatabasePluginMessages::Find_Request_ChildrenSpecification* grandgrandchildrenSpec = NULL; + + switch (request.level()) + { + case Orthanc::DatabasePluginMessages::RESOURCE_PATIENT: + childrenSpec = &(request.children_studies()); + grandchildrenSpec = &(request.children_series()); + grandgrandchildrenSpec = &(request.children_instances()); + break; + case Orthanc::DatabasePluginMessages::RESOURCE_STUDY: + childrenSpec = &(request.children_series()); + grandchildrenSpec = &(request.children_instances()); + break; + case Orthanc::DatabasePluginMessages::RESOURCE_SERIES: + childrenSpec = &(request.children_instances()); + break; + + default: + break; + } + std::string sql; // extract the resource id of interest by executing the lookup in a CTE @@ -3328,6 +3353,7 @@ std::string oneInstanceSqlCTE; + // OneInstance CTE if (request.level() != Orthanc::DatabasePluginMessages::RESOURCE_INSTANCE && request.retrieve_one_instance_metadata_and_attachments()) { @@ -3361,10 +3387,47 @@ sql += ", OneInstance AS (SELECT parentInternalId, instancePublicId, instanceInternalId FROM _OneInstance WHERE rowNum = 1) "; // this is a generic way to implement DISTINCT ON } - // if (!oneInstanceSqlCTE.empty() && (manager.GetDialect() == Dialect_MySQL || manager.GetDialect() == Dialect_SQLite)) - // { // all CTEs must be declared first in some dialects - // } - + // GrandChildCount CTE in some specific cases + bool hasGrandChildCountCTE = false; + if (HasChildCountColumn() && !allowInconsistentChildCounts_ && + grandchildrenSpec != NULL && + !grandchildrenSpec->retrieve_identifiers() && // no need to count if we have retrieved the list of identifiers + grandchildrenSpec->retrieve_count()) + { + sql += ", ValidGrandChildCounts AS (SELECT thisLevel.internalId, COALESCE(SUM(childLevel.childCount), 0) AS totalGrandChildCount " + " FROM Resources AS thisLevel " + " LEFT JOIN Resources AS childLevel ON childLevel.parentId = thisLevel.internalId " + " WHERE NOT EXISTS (SELECT 1 FROM InvalidChildCounts WHERE id = thisLevel.internalId) " + " AND NOT EXISTS (SELECT 1 FROM Resources AS child " + " JOIN InvalidChildCounts ON InvalidChildCounts.id = child.internalId " + " WHERE child.parentId = thisLevel.internalId) " + " GROUP BY thisLevel.internalId) "; + hasGrandChildCountCTE = true; + } + + // GrandGrandChildCount CTE in some specific cases + bool hasGrandGrandChildCountCTE = false; + if (HasChildCountColumn() && !allowInconsistentChildCounts_ && + grandgrandchildrenSpec != NULL && + !grandgrandchildrenSpec->retrieve_identifiers() && // no need to count if we have retrieved the list of identifiers + grandgrandchildrenSpec->retrieve_count()) + { + sql += ", ValidGrandGrandChildCounts AS (SELECT thisLevel.internalId, COALESCE(SUM(grandChildLevel.childCount), 0) AS totalGrandGrandChildCount " + " FROM Resources AS thisLevel " + " LEFT JOIN Resources AS childLevel ON childLevel.parentId = thisLevel.internalId " + " LEFT JOIN Resources AS grandChildLevel ON grandChildLevel.parentId = childLevel.internalId " + " WHERE NOT EXISTS (SELECT 1 FROM InvalidChildCounts WHERE id = thisLevel.internalId) " + " AND NOT EXISTS (SELECT 1 FROM Resources AS child " + " JOIN InvalidChildCounts ON InvalidChildCounts.id = child.internalId " + " WHERE child.parentId = thisLevel.internalId) " + " AND NOT EXISTS (SELECT 1 FROM Resources AS grandChild " + " JOIN Resources AS child ON grandChild.parentId = child.internalId " + " JOIN InvalidChildCounts ON InvalidChildCounts.id = grandChild.internalId " + " WHERE child.parentId = thisLevel.internalId) " + " GROUP BY thisLevel.internalId) "; + hasGrandGrandChildCountCTE = true; + } + std::string revisionInC7; if (HasRevisionsSupport()) { @@ -3586,22 +3649,7 @@ // need MainDicomTags from children ? if (request.level() <= Orthanc::DatabasePluginMessages::RESOURCE_SERIES) { - const Orthanc::DatabasePluginMessages::Find_Request_ChildrenSpecification* childrenSpec = NULL; - switch (request.level()) - { - case Orthanc::DatabasePluginMessages::RESOURCE_PATIENT: - childrenSpec = &(request.children_studies()); - break; - case Orthanc::DatabasePluginMessages::RESOURCE_STUDY: - childrenSpec = &(request.children_series()); - break; - case Orthanc::DatabasePluginMessages::RESOURCE_SERIES: - childrenSpec = &(request.children_instances()); - break; - - default: - break; - } + assert(childrenSpec != NULL); if (childrenSpec->retrieve_main_dicom_tags_size() > 0) { @@ -3642,30 +3690,24 @@ } else if (childrenSpec->retrieve_count()) // no need to count if we have retrieved the list of identifiers { - if (HasChildCountTable()) // TODO: rename in HasChildCountColumn ? + if (HasChildCountColumn()) { - // // we get the count value either from the childCount table if it has been computed or from the Resources table - // sql += "UNION ALL SELECT " - // " " TOSTRING(QUERY_CHILDREN_COUNT) " AS c0_queryId, " - // " Lookup.internalId AS c1_internalId, " - // " " + formatter.FormatNull("BIGINT") + " AS c2_rowNumber, " - // " " + formatter.FormatNull("TEXT") + " AS c3_string1, " - // " " + formatter.FormatNull("TEXT") + " AS c4_string2, " - // " " + formatter.FormatNull("TEXT") + " AS c5_string3, " - // " " + formatter.FormatNull("INT") + " AS c6_int1, " - // " " + formatter.FormatNull("INT") + " AS c7_int2, " - // " " + formatter.FormatNull("INT") + " AS c8_int3, " - // " COALESCE(" - // " (ChildCount.childCount)," - // " (SELECT COUNT(childLevel.internalId)" - // " FROM Resources AS childLevel" - // " WHERE Lookup.internalId = childLevel.parentId" - // " )) AS c9_big_int1, " - // " " + formatter.FormatNull("BIGINT") + " AS c10_big_int2 " - // "FROM Lookup " - // "LEFT JOIN ChildCount ON Lookup.internalId = ChildCount.parentId "; - // we get the count value either from the childCount column if it has been computed or from the Resources table + std::string getResourcesChildCount; + + if (allowInconsistentChildCounts_) // allow approximate childCount if childCount is populated but has not yet been updated + { + getResourcesChildCount = "Resources.childCount"; // read the last known value without taking into account the InvalidChildCounts table to speed up the query + } + else + { + // read the last known value only if it has not been invalidated by an entry in the InvalidChildCounts table -> more complex/slower query + getResourcesChildCount = "SELECT childCount " + "FROM Resources AS thisLevel " + "WHERE thisLevel.internalId = lookup.internalId " + " AND NOT EXISTS (SELECT 1 FROM Invalidchildcounts WHERE id = thisLevel.internalId)"; + } + sql += "UNION ALL SELECT " " " TOSTRING(QUERY_CHILDREN_COUNT) " AS c0_queryId, " " Lookup.internalId AS c1_internalId, " @@ -3677,7 +3719,7 @@ " " + formatter.FormatNull("INT") + " AS c7_int2, " " " + formatter.FormatNull("INT") + " AS c8_int3, " " COALESCE(" - " (Resources.childCount)," + " (" + getResourcesChildCount + ")," " (SELECT COUNT(childLevel.internalId)" " FROM Resources AS childLevel" " WHERE Lookup.internalId = childLevel.parentId" @@ -3726,19 +3768,7 @@ if (request.level() <= Orthanc::DatabasePluginMessages::RESOURCE_STUDY) { - const Orthanc::DatabasePluginMessages::Find_Request_ChildrenSpecification* grandchildrenSpec = NULL; - switch (request.level()) - { - case Orthanc::DatabasePluginMessages::RESOURCE_PATIENT: - grandchildrenSpec = &(request.children_series()); - break; - case Orthanc::DatabasePluginMessages::RESOURCE_STUDY: - grandchildrenSpec = &(request.children_instances()); - break; - - default: - break; - } + assert(grandchildrenSpec != NULL); // need grand children identifiers ? if (grandchildrenSpec->retrieve_identifiers()) @@ -3761,54 +3791,48 @@ } else if (grandchildrenSpec->retrieve_count()) // no need to count if we have retrieved the list of identifiers { - if (HasChildCountTable()) + if (HasChildCountColumn()) { - // // we get the count value either from the childCount table if it has been computed or from the Resources table - // sql += "UNION ALL SELECT " - // " " TOSTRING(QUERY_GRAND_CHILDREN_COUNT) " AS c0_queryId, " - // " Lookup.internalId AS c1_internalId, " - // " " + formatter.FormatNull("BIGINT") + " AS c2_rowNumber, " - // " " + formatter.FormatNull("TEXT") + " AS c3_string1, " - // " " + formatter.FormatNull("TEXT") + " AS c4_string2, " - // " " + formatter.FormatNull("TEXT") + " AS c5_string3, " - // " " + formatter.FormatNull("INT") + " AS c6_int1, " - // " " + formatter.FormatNull("INT") + " AS c7_int2, " - // " " + formatter.FormatNull("INT") + " AS c8_int3, " - // " COALESCE(" - // " (SELECT SUM(ChildCount.childCount)" - // " FROM ChildCount" - // " INNER JOIN Resources AS childLevel ON childLevel.parentId = Lookup.internalId" - // " WHERE ChildCount.parentId = childLevel.internalId)," - // " (SELECT COUNT(grandChildLevel.internalId)" - // " FROM Resources AS childLevel" - // " INNER JOIN Resources AS grandChildLevel ON childLevel.internalId = grandChildLevel.parentId" - // " WHERE Lookup.internalId = childLevel.parentId" - // " )) AS c9_big_int1, " - // " " + formatter.FormatNull("BIGINT") + " AS c10_big_int2 " - // "FROM Lookup "; + // we get the count value either from the childCount column if it has been computed or from the Resources table + std::string getResourcesGrandChildCount; + + if (allowInconsistentChildCounts_) // allow approximate childCount if childCount is populated but has not yet been updated + { + // read the last known value without taking into account the InvalidChildCounts table to speed up the query + getResourcesGrandChildCount = "SELECT SUM(childLevel.childCount) " + "FROM Resources AS childLevel " + "WHERE childLevel.parentId = Lookup.internalId"; + } + else + { + assert(hasGrandChildCountCTE); + // read the last known value only if it has not been invalidated by an entry in the InvalidChildCounts table -> more complex/slower query + getResourcesGrandChildCount = "SELECT totalGrandChildCount " + "FROM ValidGrandChildCounts " + "WHERE internalId = lookup.internalId"; + //" AND NOT EXISTS (SELECT 1 FROM Invalidchildcounts WHERE id = childLevel.internalId OR id = lookup.internalId)"; + } // we get the count value either from the childCount column if it has been computed or from the Resources table sql += "UNION ALL SELECT " - " " TOSTRING(QUERY_GRAND_CHILDREN_COUNT) " AS c0_queryId, " - " Lookup.internalId AS c1_internalId, " - " " + formatter.FormatNull("BIGINT") + " AS c2_rowNumber, " - " " + formatter.FormatNull("TEXT") + " AS c3_string1, " - " " + formatter.FormatNull("TEXT") + " AS c4_string2, " - " " + formatter.FormatNull("TEXT") + " AS c5_string3, " - " " + formatter.FormatNull("INT") + " AS c6_int1, " - " " + formatter.FormatNull("INT") + " AS c7_int2, " - " " + formatter.FormatNull("INT") + " AS c8_int3, " - " COALESCE(" - " (SELECT SUM(childLevel.childCount)" - " FROM Resources AS childLevel" - " WHERE childLevel.parentId = Lookup.internalId)," - " (SELECT COUNT(grandChildLevel.internalId)" - " FROM Resources AS childLevel" - " INNER JOIN Resources AS grandChildLevel ON childLevel.internalId = grandChildLevel.parentId" - " WHERE Lookup.internalId = childLevel.parentId" - " )) AS c9_big_int1, " - " " + formatter.FormatNull("BIGINT") + " AS c10_big_int2 " - "FROM Lookup "; + " " TOSTRING(QUERY_GRAND_CHILDREN_COUNT) " AS c0_queryId, " + " Lookup.internalId AS c1_internalId, " + " " + formatter.FormatNull("BIGINT") + " AS c2_rowNumber, " + " " + formatter.FormatNull("TEXT") + " AS c3_string1, " + " " + formatter.FormatNull("TEXT") + " AS c4_string2, " + " " + formatter.FormatNull("TEXT") + " AS c5_string3, " + " " + formatter.FormatNull("INT") + " AS c6_int1, " + " " + formatter.FormatNull("INT") + " AS c7_int2, " + " " + formatter.FormatNull("INT") + " AS c8_int3, " + " COALESCE(" + " (" + getResourcesGrandChildCount + ")," + " (SELECT COUNT(grandChildLevel.internalId)" + " FROM Resources AS childLevel" + " INNER JOIN Resources AS grandChildLevel ON childLevel.internalId = grandChildLevel.parentId" + " WHERE Lookup.internalId = childLevel.parentId" + " )) AS c9_big_int1, " + " " + formatter.FormatNull("BIGINT") + " AS c10_big_int2 " + "FROM Lookup "; } else { @@ -3872,7 +3896,7 @@ if (request.level() == Orthanc::DatabasePluginMessages::RESOURCE_PATIENT) { - const Orthanc::DatabasePluginMessages::Find_Request_ChildrenSpecification* grandgrandchildrenSpec = &(request.children_instances()); + assert(grandgrandchildrenSpec != NULL); // need grand children identifiers ? if (grandgrandchildrenSpec->retrieve_identifiers()) @@ -3896,33 +3920,29 @@ } else if (grandgrandchildrenSpec->retrieve_count()) // no need to count if we have retrieved the list of identifiers { - if (HasChildCountTable()) + if (HasChildCountColumn()) { - // // we get the count value either from the childCount table if it has been computed or from the Resources table - // sql += "UNION ALL SELECT " - // " " TOSTRING(QUERY_GRAND_GRAND_CHILDREN_COUNT) " AS c0_queryId, " - // " Lookup.internalId AS c1_internalId, " - // " " + formatter.FormatNull("BIGINT") + " AS c2_rowNumber, " - // " " + formatter.FormatNull("TEXT") + " AS c3_string1, " - // " " + formatter.FormatNull("TEXT") + " AS c4_string2, " - // " " + formatter.FormatNull("TEXT") + " AS c5_string3, " - // " " + formatter.FormatNull("INT") + " AS c6_int1, " - // " " + formatter.FormatNull("INT") + " AS c7_int2, " - // " " + formatter.FormatNull("INT") + " AS c8_int3, " - // " COALESCE(" - // " (SELECT SUM(ChildCount.childCount)" - // " FROM ChildCount" - // " INNER JOIN Resources AS childLevel ON childLevel.parentId = Lookup.internalId" - // " INNER JOIN Resources AS grandChildLevel ON grandChildLevel.parentId = childLevel.internalId" - // " WHERE ChildCount.parentId = grandChildLevel.internalId)," - // " (SELECT COUNT(grandGrandChildLevel.internalId)" - // " FROM Resources AS childLevel" - // " INNER JOIN Resources AS grandChildLevel ON childLevel.internalId = grandChildLevel.parentId" - // " INNER JOIN Resources AS grandGrandChildLevel ON grandChildLevel.internalId = grandGrandChildLevel.parentId" - // " WHERE Lookup.internalId = childLevel.parentId" - // " )) AS c9_big_int1, " - // " " + formatter.FormatNull("BIGINT") + " AS c10_big_int2 " - // "FROM Lookup "; + std::string getResourcesGrandGrandChildCount; + if (allowInconsistentChildCounts_) // allow approximate childCount if childCount is populated but has not yet been updated + { + // read the last known value without taking into account the InvalidChildCounts table to speed up the query + getResourcesGrandGrandChildCount = "SELECT SUM(grandChildLevel.childCount)" + "FROM Resources AS grandChildLevel" + "INNER JOIN Resources AS childLevel ON childLevel.parentId = Lookup.internalId" + "WHERE grandChildLevel.parentId = childLevel.internalId"; + } + else + { + assert(hasGrandGrandChildCountCTE); + // read the last known value only if it has not been invalidated by an entry in the InvalidChildCounts table -> more complex/slower query + getResourcesGrandGrandChildCount = "SELECT totalGrandGrandChildCount " + "FROM ValidGrandGrandChildCounts " + "WHERE internalId = lookup.internalId"; + // "INNER JOIN Resources AS childLevel ON childLevel.parentId = Lookup.internalId" + // "WHERE grandChildLevel.parentId = childLevel.internalId" + // " AND NOT EXISTS (SELECT 1 FROM Invalidchildcounts WHERE id = grandChildLevel.internalId OR id = childLevel.internalId OR id = lookup.internalId)"; + } + // we get the count value either from the childCount column if it has been computed or from the Resources table sql += "UNION ALL SELECT " @@ -3936,10 +3956,7 @@ " " + formatter.FormatNull("INT") + " AS c7_int2, " " " + formatter.FormatNull("INT") + " AS c8_int3, " " COALESCE(" - " (SELECT SUM(grandChildLevel.childCount)" - " FROM Resources AS grandChildLevel" - " INNER JOIN Resources AS childLevel ON childLevel.parentId = Lookup.internalId" - " WHERE grandChildLevel.parentId = childLevel.internalId)," + " (" + getResourcesGrandGrandChildCount + ")," " (SELECT COUNT(grandGrandChildLevel.internalId)" " FROM Resources AS childLevel" " INNER JOIN Resources AS grandChildLevel ON childLevel.internalId = grandChildLevel.parentId"
--- a/Framework/Plugins/IndexBackend.h Tue Feb 04 10:06:33 2025 +0100 +++ b/Framework/Plugins/IndexBackend.h Fri Feb 14 12:34:26 2025 +0100 @@ -43,18 +43,20 @@ OrthancPluginContext* context_; bool readOnly_; + bool allowInconsistentChildCounts_; boost::shared_mutex outputFactoryMutex_; std::unique_ptr<IDatabaseBackendOutput::IFactory> outputFactory_; protected: + virtual void ClearDeletedFiles(DatabaseManager& manager); virtual void ClearDeletedResources(DatabaseManager& manager); virtual void ClearRemainingAncestor(DatabaseManager& manager); - virtual bool HasChildCountTable() const = 0; + virtual bool HasChildCountColumn() const = 0; void SignalDeletedFiles(IDatabaseBackendOutput& output, DatabaseManager& manager); @@ -84,7 +86,8 @@ public: explicit IndexBackend(OrthancPluginContext* context, - bool readOnly); + bool readOnly, + bool allowInconsistentChildCounts); virtual OrthancPluginContext* GetContext() ORTHANC_OVERRIDE {
--- a/Framework/Plugins/IndexConnectionsPool.cpp Tue Feb 04 10:06:33 2025 +0100 +++ b/Framework/Plugins/IndexConnectionsPool.cpp Fri Feb 14 12:34:26 2025 +0100 @@ -49,6 +49,7 @@ void IndexConnectionsPool::HousekeepingThread(IndexConnectionsPool* that) { + OrthancPluginSetCurrentThreadName(OrthancPlugins::GetGlobalContext(), "DB HOUSEKEEPING"); boost::posix_time::ptime lastInvocation = boost::posix_time::second_clock::local_time(); while (that->housekeepingContinue_)
--- a/Framework/PostgreSQL/PostgreSQLParameters.cpp Tue Feb 04 10:06:33 2025 +0100 +++ b/Framework/PostgreSQL/PostgreSQLParameters.cpp Fri Feb 14 12:34:26 2025 +0100 @@ -98,6 +98,7 @@ lock_ = configuration.GetBooleanValue("Lock", true); // Use locking by default isVerboseEnabled_ = configuration.GetBooleanValue("EnableVerboseLogs", false); + allowInconsistentChildCounts_ = configuration.GetBooleanValue("AllowInconsistentChildCounts", false); maxConnectionRetries_ = configuration.GetUnsignedIntegerValue("MaximumConnectionRetries", 10); connectionRetryInterval_ = configuration.GetUnsignedIntegerValue("ConnectionRetryInterval", 5);
--- a/Framework/PostgreSQL/PostgreSQLParameters.h Tue Feb 04 10:06:33 2025 +0100 +++ b/Framework/PostgreSQL/PostgreSQLParameters.h Fri Feb 14 12:34:26 2025 +0100 @@ -51,6 +51,7 @@ unsigned int maxConnectionRetries_; unsigned int connectionRetryInterval_; bool isVerboseEnabled_; + bool allowInconsistentChildCounts_; IsolationMode isolationMode_; void Reset(); @@ -152,6 +153,10 @@ return isVerboseEnabled_; } + bool GetAllowInconsistentChildCounts() const + { + return allowInconsistentChildCounts_; + } void Format(std::string& target) const; };
--- a/MySQL/Plugins/MySQLIndex.h Tue Feb 04 10:06:33 2025 +0100 +++ b/MySQL/Plugins/MySQLIndex.h Fri Feb 14 12:34:26 2025 +0100 @@ -35,7 +35,7 @@ bool clearAll_; protected: - virtual bool HasChildCountTable() const ORTHANC_OVERRIDE + virtual bool HasChildCountColumn() const ORTHANC_OVERRIDE { return false; }
--- a/Odbc/Plugins/OdbcIndex.h Tue Feb 04 10:06:33 2025 +0100 +++ b/Odbc/Plugins/OdbcIndex.h Fri Feb 14 12:34:26 2025 +0100 @@ -35,7 +35,7 @@ std::string connectionString_; protected: - virtual bool HasChildCountTable() const ORTHANC_OVERRIDE + virtual bool HasChildCountColumn() const ORTHANC_OVERRIDE { return false; }
--- a/PostgreSQL/CMakeLists.txt Tue Feb 04 10:06:33 2025 +0100 +++ b/PostgreSQL/CMakeLists.txt Fri Feb 14 12:34:26 2025 +0100 @@ -93,6 +93,7 @@ POSTGRESQL_UPGRADE_UNKNOWN_TO_REV1 ${CMAKE_SOURCE_DIR}/Plugins/SQL/Upgrades/UnknownToRev1.sql POSTGRESQL_UPGRADE_REV1_TO_REV2 ${CMAKE_SOURCE_DIR}/Plugins/SQL/Upgrades/Rev1ToRev2.sql POSTGRESQL_UPGRADE_REV2_TO_REV3 ${CMAKE_SOURCE_DIR}/Plugins/SQL/Upgrades/Rev2ToRev3.sql + POSTGRESQL_UPGRADE_REV3_TO_REV4 ${CMAKE_SOURCE_DIR}/Plugins/SQL/Upgrades/Rev3ToRev4.sql )
--- a/PostgreSQL/NEWS Tue Feb 04 10:06:33 2025 +0100 +++ b/PostgreSQL/NEWS Fri Feb 14 12:34:26 2025 +0100 @@ -1,7 +1,7 @@ Pending changes in the mainline =============================== -DB schema revision: 3 +DB schema revision: 4 Minimum plugin SDK (for build): 1.12.5 Optimal plugin SDK (for build): 1.12.5 Minimum Orthanc runtime: 1.12.5 @@ -12,6 +12,13 @@ Maintenance: +* Optimized the SQL query that is maintaing the childCount + when new instances are ingested. +* New configuration "AllowInconsistentChildCounts" to speed + up childCount computation. If set to true, childCount values + of recently ingested resources will be incorrect until the next + execution of the DB Housekeeping thread (defined by "HousekeepingInterval" + - default value is 1s) * Fixed high memory usage due to caching of too many prepared SQL statement when using since & limit. * Removed duplicate comparison in find SQL queries.
--- a/PostgreSQL/Plugins/PostgreSQLIndex.cpp Tue Feb 04 10:06:33 2025 +0100 +++ b/PostgreSQL/Plugins/PostgreSQLIndex.cpp Fri Feb 14 12:34:26 2025 +0100 @@ -49,13 +49,14 @@ static const GlobalProperty GlobalProperty_HasComputeStatisticsReadOnly = GlobalProperty_DatabaseInternal4; } +#define CURRENT_DB_REVISION 4 namespace OrthancDatabases { PostgreSQLIndex::PostgreSQLIndex(OrthancPluginContext* context, const PostgreSQLParameters& parameters, bool readOnly) : - IndexBackend(context, readOnly), + IndexBackend(context, readOnly, parameters.GetAllowInconsistentChildCounts()), parameters_(parameters), clearAll_(false), hkHasComputedAllMissingChildCount_(false) @@ -122,6 +123,7 @@ { DatabaseManager::Transaction t(manager, TransactionType_ReadWrite); + bool hasAppliedAnUpgrade = false; if (!t.GetDatabaseTransaction().DoesTableExist("Resources")) { @@ -147,31 +149,12 @@ throw Orthanc::OrthancException(Orthanc::ErrorCode_Database); } - bool applyUpgradeFromUnknownToV1 = false; - bool applyUpgradeV1toV2 = false; - bool applyUpgradeV2toV3 = false; - bool applyPrepareIndex = false; - - int revision; - if (!LookupGlobalIntegerProperty(revision, manager, MISSING_SERVER_IDENTIFIER, Orthanc::GlobalProperty_DatabasePatchLevel)) + int currentRevision = 0; + if (!LookupGlobalIntegerProperty(currentRevision, manager, MISSING_SERVER_IDENTIFIER, Orthanc::GlobalProperty_DatabasePatchLevel)) { - LOG(WARNING) << "No DatabasePatchLevel found, assuming it's 1"; - revision = 1; - applyUpgradeFromUnknownToV1 = true; - applyUpgradeV1toV2 = true; - applyUpgradeV2toV3 = true; + LOG(WARNING) << "No Database revision found"; } - else if (revision == 1) - { - LOG(WARNING) << "DatabasePatchLevel is 1"; - applyUpgradeV1toV2 = true; - applyUpgradeV2toV3 = true; - } - else if (revision == 2) - { - LOG(WARNING) << "DatabasePatchLevel is 2"; - applyUpgradeV2toV3 = true; - } + LOG(WARNING) << "Current Database revision is " << currentRevision; int hasTrigram = 0; if (!LookupGlobalIntegerProperty(hasTrigram, manager, MISSING_SERVER_IDENTIFIER, @@ -180,9 +163,12 @@ { // We've observed 9 minutes on DB with 100000 studies LOG(WARNING) << "The DB schema update will try to enable trigram matching on the PostgreSQL database " - << "to speed up wildcard searches. This may take several minutes"; - applyUpgradeV1toV2 = true; - applyUpgradeV2toV3 = true; + << "to speed up wildcard searches. This may take several minutes. "; + if (currentRevision > 0) + { + LOG(WARNING) << "Considering current revision is 1"; + currentRevision = 1; + } } int property = 0; @@ -190,15 +176,18 @@ Orthanc::GlobalProperty_GetLastChangeIndex) || property != 1) { - applyUpgradeV1toV2 = true; - applyUpgradeV2toV3 = true; + LOG(WARNING) << "The DB schema does not contain the GetLastChangeIndex update. "; + if (currentRevision > 0) + { + LOG(WARNING) << "Considering current revision is 1"; + currentRevision = 1; + } + } // If you add new tests here, update the test in the "ReadOnly" code below - applyPrepareIndex = applyUpgradeV2toV3; - - if (applyUpgradeFromUnknownToV1) + if (currentRevision == 0) { LOG(WARNING) << "Upgrading DB schema from unknown to revision 1"; std::string query; @@ -206,9 +195,10 @@ Orthanc::EmbeddedResources::GetFileResource (query, Orthanc::EmbeddedResources::POSTGRESQL_UPGRADE_UNKNOWN_TO_REV1); t.GetDatabaseTransaction().ExecuteMultiLines(query); + currentRevision = 1; } - if (applyUpgradeV1toV2) + if (currentRevision == 1) { LOG(WARNING) << "Upgrading DB schema from revision 1 to revision 2"; @@ -217,9 +207,10 @@ Orthanc::EmbeddedResources::GetFileResource (query, Orthanc::EmbeddedResources::POSTGRESQL_UPGRADE_REV1_TO_REV2); t.GetDatabaseTransaction().ExecuteMultiLines(query); + currentRevision = 2; } - if (applyUpgradeV2toV3) + if (currentRevision == 2) { LOG(WARNING) << "Upgrading DB schema from revision 2 to revision 3"; @@ -228,17 +219,61 @@ Orthanc::EmbeddedResources::GetFileResource (query, Orthanc::EmbeddedResources::POSTGRESQL_UPGRADE_REV2_TO_REV3); t.GetDatabaseTransaction().ExecuteMultiLines(query); + currentRevision = 3; + } + + if (currentRevision == 3) + { + LOG(WARNING) << "Upgrading DB schema from revision 3 to revision 4"; + + std::string query; + + Orthanc::EmbeddedResources::GetFileResource + (query, Orthanc::EmbeddedResources::POSTGRESQL_UPGRADE_REV3_TO_REV4); + t.GetDatabaseTransaction().ExecuteMultiLines(query); + hasAppliedAnUpgrade = true; + currentRevision = 4; } - if (applyPrepareIndex) + if (hasAppliedAnUpgrade) { + LOG(WARNING) << "Upgrading DB schema by applying PrepareIndex.sql"; // apply all idempotent changes that are in the PrepareIndex.sql ApplyPrepareIndex(t, manager); + + if (!LookupGlobalIntegerProperty(currentRevision, manager, MISSING_SERVER_IDENTIFIER, Orthanc::GlobalProperty_DatabasePatchLevel)) + { + LOG(ERROR) << "No Database revision found after the upgrade !"; + throw Orthanc::OrthancException(Orthanc::ErrorCode_Database); + } + + LOG(WARNING) << "Database revision after the upgrade is " << currentRevision; + + if (currentRevision != CURRENT_DB_REVISION) + { + LOG(ERROR) << "Invalid database revision after the upgrade !"; + throw Orthanc::OrthancException(Orthanc::ErrorCode_Database); + } } } t.Commit(); + + if (hasAppliedAnUpgrade) + { + int currentRevision = 0; + + LookupGlobalIntegerProperty(currentRevision, manager, MISSING_SERVER_IDENTIFIER, Orthanc::GlobalProperty_DatabasePatchLevel); + LOG(WARNING) << "Database revision after the upgrade is " << currentRevision; + + if (currentRevision != CURRENT_DB_REVISION) + { + LOG(ERROR) << "Invalid database revision after the upgrade !"; + throw Orthanc::OrthancException(Orthanc::ErrorCode_Database); + } + } + } } else @@ -250,7 +285,7 @@ // test if the latest "extension" has been installed int revision; if (!LookupGlobalIntegerProperty(revision, manager, MISSING_SERVER_IDENTIFIER, Orthanc::GlobalProperty_DatabasePatchLevel) - || revision != 3) + || revision != CURRENT_DB_REVISION) { LOG(ERROR) << "READ-ONLY SYSTEM: the DB does not have the correct schema to run with this version of the plugin"; throw Orthanc::OrthancException(Orthanc::ErrorCode_Database); @@ -768,5 +803,26 @@ int64_t patientsCount, studiesCount, seriesCount, instancesCount, compressedSize, uncompressedSize; UpdateAndGetStatistics(manager, patientsCount, studiesCount, seriesCount, instancesCount, compressedSize, uncompressedSize); } + + // Update the invalidated childCounts + try + { + DatabaseManager::CachedStatement statement( + STATEMENT_FROM_HERE, manager, + "SELECT * FROM UpdateInvalidChildCounts()"); + + statement.Execute(); + + int64_t updatedCount = statement.ReadInteger64(0); + if (updatedCount > 0) + { + LOG(INFO) << "Updated " << updatedCount << " invalid ChildCount entries"; + } + } + catch (Orthanc::OrthancException&) + { + // the statement may fail in case of temporary deadlock -> it will be retried at the next HK + LOG(INFO) << "Updat of invalid ChildCount entries has failed (will be retried)"; + } } }
--- a/PostgreSQL/Plugins/PostgreSQLIndex.h Tue Feb 04 10:06:33 2025 +0100 +++ b/PostgreSQL/Plugins/PostgreSQLIndex.h Fri Feb 14 12:34:26 2025 +0100 @@ -44,7 +44,7 @@ virtual void ClearRemainingAncestor(DatabaseManager& manager) ORTHANC_OVERRIDE; - virtual bool HasChildCountTable() const ORTHANC_OVERRIDE + virtual bool HasChildCountColumn() const ORTHANC_OVERRIDE { return true; }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PostgreSQL/Plugins/SQL/Downgrades/Rev4ToRev3.sql Fri Feb 14 12:34:26 2025 +0100 @@ -0,0 +1,71 @@ +-- This file contains an SQL procedure to downgrade from schema Rev4 to Rev3 (version = 6). + -- It re-installs the old childcount trigger mechanisms + +DROP TRIGGER IF EXISTS IncrementChildCount on Resources; +DROP TRIGGER IF EXISTS DecrementChildCount on Resources; + +CREATE OR REPLACE FUNCTION UpdateChildCount() +RETURNS TRIGGER AS $body$ +BEGIN + IF TG_OP = 'INSERT' THEN + IF new.parentId IS NOT NULL THEN + -- try to increment the childcount from the parent + -- note that we already have the lock on this row because the parent is locked in CreateInstance + UPDATE Resources + SET childCount = childCount + 1 + WHERE internalId = new.parentId AND childCount IS NOT NULL; + + -- this should only happen for old studies whose childCount has not yet been computed + -- note: this child has already been added so it will be counted + IF NOT FOUND THEN + UPDATE Resources + SET childCount = (SELECT COUNT(*) + FROM Resources + WHERE internalId = new.parentId) + WHERE internalId = new.parentId; + END IF; + END IF; + + ELSIF TG_OP = 'DELETE' THEN + + IF old.parentId IS NOT NULL THEN + + -- Decrement the child count for the parent + -- note that we already have the lock on this row because the parent is locked in DeleteResource + UPDATE Resources + SET childCount = childCount - 1 + WHERE internalId = old.parentId AND childCount IS NOT NULL; + + -- this should only happen for old studies whose childCount has not yet been computed + -- note: this child has already been removed so it will not be counted + IF NOT FOUND THEN + UPDATE Resources + SET childCount = (SELECT COUNT(*) + FROM Resources + WHERE internalId = new.parentId) + WHERE internalId = new.parentId; + END IF; + END IF; + + END IF; + RETURN NULL; +END; +$body$ LANGUAGE plpgsql; + +CREATE TRIGGER IncrementChildCount +AFTER INSERT ON Resources +FOR EACH ROW +EXECUTE PROCEDURE UpdateChildCount(); + +CREATE TRIGGER DecrementChildCount +AFTER DELETE ON Resources +FOR EACH ROW +WHEN (OLD.parentId IS NOT NULL) +EXECUTE PROCEDURE UpdateChildCount(); + +----------- + +-- set the global properties that actually documents the DB version, revision and some of the capabilities +-- modify only the ones that have changed +DELETE FROM GlobalProperties WHERE property IN (4, 11); +INSERT INTO GlobalProperties VALUES (4, 3); -- GlobalProperty_DatabasePatchLevel
--- a/PostgreSQL/Plugins/SQL/PrepareIndex.sql Tue Feb 04 10:06:33 2025 +0100 +++ b/PostgreSQL/Plugins/SQL/PrepareIndex.sql Fri Feb 14 12:34:26 2025 +0100 @@ -626,6 +626,55 @@ $body$ LANGUAGE plpgsql; +-- -- new in rev4 + +-- This table records all resource entries whose childCount column is currently invalid +-- because of recent addition/removal of a child. +-- This way, each transaction that is adding/removing a child can add row independently +-- in this table without having to lock the parent resource row. +-- At regular interval, the DB housekeeping thread updates the childCount column of +-- resources with an entry in this table. +CREATE TABLE IF NOT EXISTS InvalidChildCounts( + id BIGINT REFERENCES Resources(internalId) ON DELETE CASCADE, + updatedAt TIMESTAMP DEFAULT NOW()); + +-- Updates the Resources.childCount column with the delta that have not been committed yet. +-- A thread will call this function at regular interval to update all pending values. +CREATE OR REPLACE FUNCTION UpdateInvalidChildCounts( + OUT updated_rows_count BIGINT +) RETURNS BIGINT AS $body$ +DECLARE + locked_resources_ids BIGINT[]; +BEGIN + + -- Lock the resources rows asap to prevent deadlocks + -- that will need to be retried + SELECT ARRAY(SELECT internalId + FROM Resources + WHERE internalId IN (SELECT DISTINCT id FROM InvalidChildCounts) + FOR UPDATE) + INTO locked_resources_ids; + + -- New rows can be added in the meantime, they won't be taken into account this time. + WITH deleted_rows AS ( + DELETE FROM InvalidChildCounts + WHERE id = ANY(locked_resources_ids) + RETURNING id + ) + + UPDATE Resources + SET childCount = (SELECT COUNT(childLevel.internalId) + FROM Resources AS childLevel + WHERE childLevel.parentId = Resources.internalId) + WHERE internalid = ANY(locked_resources_ids); + + -- Get the number of rows affected + GET DIAGNOSTICS updated_rows_count = ROW_COUNT; + +END; +$body$ LANGUAGE plpgsql; + + DROP TRIGGER IF EXISTS IncrementChildCount on Resources; DROP TRIGGER IF EXISTS DecrementChildCount on Resources; @@ -635,42 +684,20 @@ BEGIN IF TG_OP = 'INSERT' THEN IF new.parentId IS NOT NULL THEN - -- try to increment the childcount from the parent - -- note that we already have the lock on this row because the parent is locked in CreateInstance - UPDATE Resources - SET childCount = childCount + 1 - WHERE internalId = new.parentId AND childCount IS NOT NULL; - - -- this should only happen for old studies whose childCount has not yet been computed - -- note: this child has already been added so it will be counted - IF NOT FOUND THEN - UPDATE Resources - SET childCount = (SELECT COUNT(*) - FROM Resources - WHERE internalId = new.parentId) - WHERE internalId = new.parentId; - END IF; + -- mark the parent's childCount as invalid + INSERT INTO InvalidChildCounts VALUES(new.parentId); END IF; ELSIF TG_OP = 'DELETE' THEN IF old.parentId IS NOT NULL THEN - - -- Decrement the child count for the parent - -- note that we already have the lock on this row because the parent is locked in DeleteResource - UPDATE Resources - SET childCount = childCount - 1 - WHERE internalId = old.parentId AND childCount IS NOT NULL; - - -- this should only happen for old studies whose childCount has not yet been computed - -- note: this child has already been removed so it will not be counted - IF NOT FOUND THEN - UPDATE Resources - SET childCount = (SELECT COUNT(*) - FROM Resources - WHERE internalId = new.parentId) - WHERE internalId = new.parentId; - END IF; + BEGIN + -- mark the parent's childCount as invalid + INSERT INTO InvalidChildCounts VALUES(old.parentId); + EXCEPTION + -- when deleting the last child of a parent, the insert will fail (this is expected) + WHEN foreign_key_violation THEN NULL; + END; END IF; END IF; @@ -694,7 +721,7 @@ -- set the global properties that actually documents the DB version, revision and some of the capabilities DELETE FROM GlobalProperties WHERE property IN (1, 4, 6, 10, 11, 12, 13, 14); INSERT INTO GlobalProperties VALUES (1, 6); -- GlobalProperty_DatabaseSchemaVersion -INSERT INTO GlobalProperties VALUES (4, 3); -- GlobalProperty_DatabasePatchLevel +INSERT INTO GlobalProperties VALUES (4, 4); -- GlobalProperty_DatabasePatchLevel INSERT INTO GlobalProperties VALUES (6, 1); -- GlobalProperty_GetTotalSizeIsFast INSERT INTO GlobalProperties VALUES (10, 1); -- GlobalProperty_HasTrigramIndex INSERT INTO GlobalProperties VALUES (11, 3); -- GlobalProperty_HasCreateInstance -- this is actually the 3rd version of HasCreateInstance
--- a/PostgreSQL/Plugins/SQL/Upgrades/Rev2ToRev3.sql Tue Feb 04 10:06:33 2025 +0100 +++ b/PostgreSQL/Plugins/SQL/Upgrades/Rev2ToRev3.sql Fri Feb 14 12:34:26 2025 +0100 @@ -40,5 +40,5 @@ -- other changes performed in PrepareIndex.sql: - -- add ChildCount tables and triggers + -- add ChildCount triggers
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PostgreSQL/Plugins/SQL/Upgrades/Rev3ToRev4.sql Fri Feb 14 12:34:26 2025 +0100 @@ -0,0 +1,2 @@ +-- everything is performed in PrepareIndex.sql +SELECT 1; \ No newline at end of file
--- a/SQLite/Plugins/SQLiteIndex.h Tue Feb 04 10:06:33 2025 +0100 +++ b/SQLite/Plugins/SQLiteIndex.h Fri Feb 14 12:34:26 2025 +0100 @@ -84,7 +84,7 @@ const Orthanc::DatabasePluginMessages::Find_Request& request) ORTHANC_OVERRIDE; #endif - virtual bool HasChildCountTable() const ORTHANC_OVERRIDE + virtual bool HasChildCountColumn() const ORTHANC_OVERRIDE { return false; }