changeset 432:8b7c1c423367 pg-transactions

new 'TransactionMode' config + rewrote ResourceDeletedFunc to avoid IF/THEN/ELSE pattern
author Alain Mazy <am@osimis.io>
date Mon, 11 Dec 2023 14:39:27 +0100
parents 7c1fe5d6c12c
children 5964ce6385a5
files Framework/Plugins/IndexBackend.cpp Framework/PostgreSQL/PostgreSQLDatabase.h Framework/PostgreSQL/PostgreSQLParameters.cpp Framework/PostgreSQL/PostgreSQLParameters.h NOTES PostgreSQL/CMakeLists.txt PostgreSQL/NEWS PostgreSQL/Plugins/PostgreSQLIndex.cpp PostgreSQL/Plugins/PrepareIndex.sql PostgreSQL/Plugins/ResourceDeletedFunc.sql
diffstat 10 files changed, 192 insertions(+), 58 deletions(-) [+]
line wrap: on
line diff
--- a/Framework/Plugins/IndexBackend.cpp	Thu Dec 07 12:13:43 2023 +0100
+++ b/Framework/Plugins/IndexBackend.cpp	Mon Dec 11 14:39:27 2023 +0100
@@ -470,7 +470,7 @@
           static_cast<OrthancPluginResourceType>(statement.ReadInteger32(0)));
           
         // There is at most 1 remaining ancestor
-        assert((statement.Next(), statement.IsDone()));
+        //assert((statement.Next(), statement.IsDone()));
       }
     }
     
--- a/Framework/PostgreSQL/PostgreSQLDatabase.h	Thu Dec 07 12:13:43 2023 +0100
+++ b/Framework/PostgreSQL/PostgreSQLDatabase.h	Mon Dec 11 14:39:27 2023 +0100
@@ -109,12 +109,12 @@
     static PostgreSQLDatabase* CreateDatabaseConnection(const PostgreSQLParameters& parameters);
 
   protected:
-    const std::string& GetReadWriteTransactionStatement() const
+    const char* GetReadWriteTransactionStatement() const
     {
       return parameters_.GetReadWriteTransactionStatement();
     }
 
-    const std::string& GetReadOnlyTransactionStatement() const
+    const char* GetReadOnlyTransactionStatement() const
     {
       return parameters_.GetReadOnlyTransactionStatement();
     }
--- a/Framework/PostgreSQL/PostgreSQLParameters.cpp	Thu Dec 07 12:13:43 2023 +0100
+++ b/Framework/PostgreSQL/PostgreSQLParameters.cpp	Mon Dec 11 14:39:27 2023 +0100
@@ -42,8 +42,6 @@
     lock_ = true;
     maxConnectionRetries_ = 10;
     connectionRetryInterval_ = 5;
-    readWriteTransactionStatement_ = "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE READ WRITE";
-    readOnlyTransactionStatement_ = "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY";
     isVerboseEnabled_ = false;
   }
 
@@ -102,16 +100,23 @@
     maxConnectionRetries_ = configuration.GetUnsignedIntegerValue("MaximumConnectionRetries", 10);
     connectionRetryInterval_ = configuration.GetUnsignedIntegerValue("ConnectionRetryInterval", 5);
 
-    if (configuration.LookupStringValue(s, "ReadWriteTransactionStatement"))
+    std::string transactionMode = configuration.GetStringValue("TransactionMode", "SERIALIZABLE");
+    if (transactionMode == "DEFAULT")
     {
-      SetReadWriteTransactionStatement(s);
+      SetIsolationMode(IsolationMode_DbDefault);
+    }
+    else if (transactionMode == "READ COMMITTED")
+    {
+      SetIsolationMode(IsolationMode_ReadCommited);
     }
-
-    if (configuration.LookupStringValue(s, "ReadOnlyTransactionStatement"))
+    else if (transactionMode == "SERIALIZABLE")
     {
-      SetReadOnlyTransactionStatement(s);
+      SetIsolationMode(IsolationMode_Serializable);
     }
-
+    else
+    {
+      throw Orthanc::OrthancException(Orthanc::ErrorCode_BadParameterType, std::string("Invalid value for 'TransactionMode': ") + transactionMode);
+    }
   }
 
 
--- a/Framework/PostgreSQL/PostgreSQLParameters.h	Thu Dec 07 12:13:43 2023 +0100
+++ b/Framework/PostgreSQL/PostgreSQLParameters.h	Mon Dec 11 14:39:27 2023 +0100
@@ -30,6 +30,13 @@
 
 namespace OrthancDatabases
 {
+  enum IsolationMode
+  {
+    IsolationMode_DbDefault = 0,
+    IsolationMode_Serializable = 1,
+    IsolationMode_ReadCommited = 2
+  };
+
   class PostgreSQLParameters
   {
   private:
@@ -43,10 +50,8 @@
     bool         lock_;
     unsigned int maxConnectionRetries_;
     unsigned int connectionRetryInterval_;
-    std::string  readWriteTransactionStatement_;
-    std::string  readOnlyTransactionStatement_;
     bool         isVerboseEnabled_;
-
+    IsolationMode isolationMode_;
     void Reset();
 
   public:
@@ -128,24 +133,39 @@
       return connectionRetryInterval_;
     }
 
-    void SetReadWriteTransactionStatement(const std::string& statement)
+    void SetIsolationMode(IsolationMode isolationMode)
     {
-      readWriteTransactionStatement_ = statement;
+      isolationMode_ = isolationMode;
     }
 
-    void SetReadOnlyTransactionStatement(const std::string& statement)
+    const char* GetReadWriteTransactionStatement() const
     {
-      readOnlyTransactionStatement_ = statement;
+      switch (isolationMode_)
+      {
+        case IsolationMode_DbDefault:
+          return "";
+        case IsolationMode_ReadCommited:
+          return "SET TRANSACTION ISOLATION LEVEL READ COMMITTED READ WRITE";
+        case IsolationMode_Serializable:
+          return "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE READ WRITE";
+        default:
+          throw Orthanc::OrthancException(Orthanc::ErrorCode_NotImplemented);
+      }
     }
 
-    const std::string& GetReadWriteTransactionStatement() const
+    const char* GetReadOnlyTransactionStatement() const
     {
-      return readWriteTransactionStatement_;
-    }
-
-    const std::string& GetReadOnlyTransactionStatement() const
-    {
-      return readOnlyTransactionStatement_;
+      switch (isolationMode_)
+      {
+        case IsolationMode_DbDefault:
+          return "";
+        case IsolationMode_ReadCommited:
+          return "SET TRANSACTION ISOLATION LEVEL READ COMMITTED READ ONLY";
+        case IsolationMode_Serializable:
+          return "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY";
+        default:
+          throw Orthanc::OrthancException(Orthanc::ErrorCode_NotImplemented);
+      }
     }
 
     void SetVerboseEnabled(bool enabled)
--- a/NOTES	Thu Dec 07 12:13:43 2023 +0100
+++ b/NOTES	Mon Dec 11 14:39:27 2023 +0100
@@ -3,6 +3,63 @@
 - PG transaction modes explained: https://www.postgresql.org/files/developer/concurrency.pdf
 - Isoolation level explained (PG + MySQL): https://amirsoleimani.medium.com/understanding-database-isolation-level-via-examples-mysql-and-postgres-a86b5502d404
 
+Create and delete instances Internals:
+*************************************
+
+isNewInstance = CreateInstance(...)
+
+if (!isNewInstance && overwriteInstances)
+  DeleteResource(instance)
+       -> ClearDeletedFiles(manager);
+            DELETE FROM DeletedFiles  ------> this is not a temporary table in postgres but it is supposed to be empty before and after each transaction -> it is empty when taking a snapshot of the DB in READ COMMITTED mode!!!
+          ClearDeletedResources(manager);
+            DELETE FROM DeletedResources  ------> this is not a temporary table in postgres but it is supposed to be empty before and after each transaction !!!
+
+            DELETE FROM RemainingAncestor  ------> this is not a temporary table in postgres but it is supposed to be empty before and after each transaction !!!
+            DELETE FROM Resources WHERE internalId=..
+               -> cascades delete the MainDicomTags, the Metadata and the AttachedFiles
+                  -> this triggers AttachedFileDeletedFunc
+                         INSERT INTO DeletedFiles VALUES
+                            (old.uuid, old.filetype, old.compressedSize,
+                            old.uncompressedSize, old.compressionType,
+                            old.uncompressedHash, old.compressedHash);
+                        RETURN NULL;
+               -> this triggers a SQL trigger: ResourceDeletedFunc
+                        INSERT INTO DeletedResources VALUES (old.resourceType, old.publicId);
+                        IF EXISTS (SELECT 1 FROM Resources WHERE parentId = old.parentId) THEN
+                            -- Signal that the deleted resource has a remaining parent 
+                            -- (a parent that must not be deleted but whose LastUpdate must be updated)
+                            INSERT INTO RemainingAncestor
+                            SELECT resourceType, publicId FROM Resources WHERE internalId = old.parentId;
+                        ELSE
+                            -- Delete a parent resource when its unique child is deleted 
+                            DELETE FROM Resources WHERE internalId = old.parentId;
+                        END IF;
+
+            SELECT * FROM RemainingAncestor
+              -> SignalRemainingAncestor()  // There is at most 1 remaining ancestor
+                 -> ServerIndex::TransactionContext::SignalRemainingAncestor()
+                    -> stores remainingType and remainingPublicId (this is used in StatelessDatabaseOperations::DeleteResource to build the Rest Response of /delete 
+                                                                   and to update the LastUpdate of all parent (only when deleted from /delete))
+
+          SignalDeletedFiles(output, manager);
+            SELECT * FROM DeletedFiles
+              -> SignalDeletedAttachment()
+                 -> ServerIndex::TransactionContext::SignalAttachmentDeleted()
+                    -> pendingFilesToRemove_.push_back(FileToRemove(info))  (files are deleted in CommitFilesToRemove in the ServerIndex::TransactionContext::Commit)
+
+          SignalDeletedResources(output, manager);
+            SELECT resourceType, publicId FROM DeletedResources
+              -> SignalDeletedResource()
+                 -> Emit DeletedResource event (lua)
+
+
+  if (!CreateInstance(...))
+    Error: "No new instance while overwriting; this should not happen"
+
+if isNewInstance -> LogChange
+if isNewSeries -> LogChange
+....
 
 Sample SQL code that you can execute in DBeaver to test new functions/procedures:
 
@@ -113,8 +170,13 @@
 Orthanc mainline + PG maineline (serializable mode)   : test_concurrent_anonymize_same_study with 4 workers and 10x repeat: 51.913 s
 Orthanc mainline + PG maineline (read-committed mode) : test_concurrent_anonymize_same_study with 4 workers and 10x repeat: 24.754 s
 
-Orthanc 1.12.1 + PG 5.1 (serializable mode)           : test_concurrent_uploads_same_study with 40 workers and 1x repeat: 55.932 s
+Orthanc 1.12.1 + PG 5.1 (serializable mode)           : test_concurrent_uploads_same_study with 20 workers and 1x repeat: 21.214 s
 Orthanc mainline + PG maineline (serializable mode)   : fails: No new instance while overwriting; this should not happen
-Orthanc mainline + PG maineline (read-committed mode) : 
+Orthanc mainline + PG maineline (read-committed mode) : test_concurrent_uploads_same_study with 20 workers and 1x repeat: 1.394 s (with assert((statement.Next(), statement.IsDone())) commented out)
 
 TODO:
+- assert((statement.Next(), statement.IsDone())) commented out  -> create temporary tables ?
+- PatientAddedFunc contains an IF
+- validate upgrade DB from previous Orthanc and from scratch
+- test with older version of PG
+- force the create/update DB transaction to be serializable (not needed: this is handled by POSTGRESQL_LOCK_DATABASE_SETUP)
--- a/PostgreSQL/CMakeLists.txt	Thu Dec 07 12:13:43 2023 +0100
+++ b/PostgreSQL/CMakeLists.txt	Mon Dec 11 14:39:27 2023 +0100
@@ -82,6 +82,7 @@
   POSTGRESQL_FAST_TOTAL_SIZE        ${CMAKE_SOURCE_DIR}/Plugins/FastTotalSize.sql
   POSTGRESQL_FAST_COUNT_RESOURCES   ${CMAKE_SOURCE_DIR}/Plugins/FastCountResources.sql
   POSTGRESQL_GET_LAST_CHANGE_INDEX  ${CMAKE_SOURCE_DIR}/Plugins/GetLastChangeIndex.sql
+  POSTGRESQL_RESOURCE_DELETED_FUNC  ${CMAKE_SOURCE_DIR}/Plugins/ResourceDeletedFunc.sql
   )
 
 
--- a/PostgreSQL/NEWS	Thu Dec 07 12:13:43 2023 +0100
+++ b/PostgreSQL/NEWS	Mon Dec 11 14:39:27 2023 +0100
@@ -2,12 +2,13 @@
 ===============================
 
 * Experimental debug feature:
-  Introduced 2 new configurations with these default values:
-    "ReadOnlyTransactionStatement": "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY"
-    "ReadWriteTransactionStatement": "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE READ WRITE"
-  You can now customize the transaction isolation level.
-  If setting these values to "", no statement is run and therefore, the default DB or server
-  isolation level is used.
+  Introduced a new configuration to select the transaction isolation level
+    "TransactionMode": "SERIALIZABLE"
+  Allowed values: "SERIALIZABLE", "READ COMMITTED", "DEFAULT".
+  The "SERIALIZABLE" mode was the only available value up to now.  It is still the default
+  value now.
+  The "READ COMMITTED" is possible now due to rewrites of SQL queries.
+  The "DEFAULT" value uses the default transaction isolation level defined at the database level.
 * internals:
   - Added a UNIQUE constraint on Resources.publicId to detect DB inconsistencies
 * New "EnableVerboseLogs" configuration to show SQL statements being executed.
--- a/PostgreSQL/Plugins/PostgreSQLIndex.cpp	Thu Dec 07 12:13:43 2023 +0100
+++ b/PostgreSQL/Plugins/PostgreSQLIndex.cpp	Mon Dec 11 14:39:27 2023 +0100
@@ -142,7 +142,20 @@
           SetGlobalIntegerProperty(manager, MISSING_SERVER_IDENTIFIER, Orthanc::GlobalProperty_DatabasePatchLevel, revision);
         }
 
-        if (revision != 2)
+        if (revision == 2)
+        {
+          LOG(WARNING) << "PostgreSQL plugin: adding or replacing ResourceDeletedFunc";
+
+          std::string query;
+          Orthanc::EmbeddedResources::GetFileResource
+            (query, Orthanc::EmbeddedResources::POSTGRESQL_RESOURCE_DELETED_FUNC);
+          t.GetDatabaseTransaction().ExecuteMultiLines(query);
+
+          revision = 3;
+          SetGlobalIntegerProperty(manager, MISSING_SERVER_IDENTIFIER, Orthanc::GlobalProperty_DatabasePatchLevel, revision);
+        }
+
+        if (revision != 3)
         {
           LOG(ERROR) << "PostgreSQL plugin is incompatible with database schema revision: " << revision;
           throw Orthanc::OrthancException(Orthanc::ErrorCode_Database);        
@@ -489,6 +502,8 @@
       statement.SetResultFieldType(i, ValueType_Integer64);
     }
 
+    // LOG(INFO) << statement.ReadInteger64(0) << statement.ReadInteger64(1) << statement.ReadInteger64(2) << statement.ReadInteger64(3);
+
     result.isNewInstance = (statement.ReadInteger64(3) == 1);
     result.instanceId = statement.ReadInteger64(7);
 
--- a/PostgreSQL/Plugins/PrepareIndex.sql	Thu Dec 07 12:13:43 2023 +0100
+++ b/PostgreSQL/Plugins/PrepareIndex.sql	Mon Dec 11 14:39:27 2023 +0100
@@ -124,32 +124,34 @@
 EXECUTE PROCEDURE AttachedFileDeletedFunc();
 
 
--- The following trigger combines 2 triggers from SQLite:
--- ResourceDeleted + ResourceDeletedParentCleaning
-CREATE FUNCTION ResourceDeletedFunc() 
-RETURNS TRIGGER AS $body$
-BEGIN
-  --RAISE NOTICE 'Delete resource %', old.parentId;
-  INSERT INTO DeletedResources VALUES (old.resourceType, old.publicId);
+-- previous version of the trigger, moved into ResourceDeletedFunc.sql
+-- 
+-- -- The following trigger combines 2 triggers from SQLite:
+-- -- ResourceDeleted + ResourceDeletedParentCleaning
+-- CREATE FUNCTION ResourceDeletedFunc() 
+-- RETURNS TRIGGER AS $body$
+-- BEGIN
+--   --RAISE NOTICE 'Delete resource %', old.parentId;
+--   INSERT INTO DeletedResources VALUES (old.resourceType, old.publicId);
   
-  -- http://stackoverflow.com/a/11299968/881731
-  IF EXISTS (SELECT 1 FROM Resources WHERE parentId = old.parentId) THEN
-    -- Signal that the deleted resource has a remaining parent 
-    -- (a parent that must not be deleted but whose LastUpdate must be updated)
-    INSERT INTO RemainingAncestor
-      SELECT resourceType, publicId FROM Resources WHERE internalId = old.parentId;
-  ELSE
-    -- Delete a parent resource when its unique child is deleted 
-    DELETE FROM Resources WHERE internalId = old.parentId;
-  END IF;
-  RETURN NULL;
-END;
-$body$ LANGUAGE plpgsql;
+--   -- http://stackoverflow.com/a/11299968/881731
+--   IF EXISTS (SELECT 1 FROM Resources WHERE parentId = old.parentId) THEN
+--     -- Signal that the deleted resource has a remaining parent 
+--     -- (a parent that must not be deleted but whose LastUpdate must be updated)
+--     INSERT INTO RemainingAncestor
+--       SELECT resourceType, publicId FROM Resources WHERE internalId = old.parentId;
+--   ELSE
+--     -- Delete a parent resource when its unique child is deleted 
+--     DELETE FROM Resources WHERE internalId = old.parentId;
+--   END IF;
+--   RETURN NULL;
+-- END;
+-- $body$ LANGUAGE plpgsql;
 
-CREATE TRIGGER ResourceDeleted
-AFTER DELETE ON Resources
-FOR EACH ROW
-EXECUTE PROCEDURE ResourceDeletedFunc();
+-- CREATE TRIGGER ResourceDeleted
+-- AFTER DELETE ON Resources
+-- FOR EACH ROW
+-- EXECUTE PROCEDURE ResourceDeletedFunc();
 
 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PostgreSQL/Plugins/ResourceDeletedFunc.sql	Mon Dec 11 14:39:27 2023 +0100
@@ -0,0 +1,28 @@
+-- this script can be used either the first time we create the DB or during an upgrade
+DROP TRIGGER IF EXISTS ResourceDeleted ON Resources;
+
+-- The following trigger combines 2 triggers from SQLite:
+-- ResourceDeleted + ResourceDeletedParentCleaning
+CREATE OR REPLACE FUNCTION ResourceDeletedFunc() 
+RETURNS TRIGGER AS $body$
+BEGIN
+  --RAISE NOTICE 'Delete resource %', old.parentId;
+  INSERT INTO DeletedResources VALUES (old.resourceType, old.publicId);
+  
+  -- If this resource still has siblings, keep track of the remaining parent
+  -- (a parent that must not be deleted but whose LastUpdate must be updated)
+  INSERT INTO RemainingAncestor SELECT resourceType, publicId 
+                                FROM Resources 
+                                WHERE internalId = old.parentId
+                                      AND EXISTS (SELECT 1 FROM Resources WHERE parentId = old.parentId);
+  -- If this resource is the latest child, delete the parent
+  DELETE FROM Resources WHERE internalId = old.parentId
+                              AND NOT EXISTS (SELECT 1 FROM Resources WHERE parentId = old.parentId);
+  RETURN NULL;
+END;
+$body$ LANGUAGE plpgsql;
+
+CREATE TRIGGER ResourceDeleted
+AFTER DELETE ON Resources
+FOR EACH ROW
+EXECUTE PROCEDURE ResourceDeletedFunc();