changeset 433:5964ce6385a5 pg-transactions

use temporary tables for DeletedFiles, RemainingAncestor and DeletedResources
author Alain Mazy <am@osimis.io>
date Wed, 13 Dec 2023 15:48:56 +0100
parents 8b7c1c423367
children 23c7af6f671a
files Framework/Plugins/IndexBackend.cpp Framework/Plugins/IndexBackend.h NOTES PostgreSQL/Plugins/PostgreSQLIndex.cpp PostgreSQL/Plugins/PostgreSQLIndex.h PostgreSQL/Plugins/ResourceDeletedFunc.sql
diffstat 6 files changed, 131 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/Framework/Plugins/IndexBackend.cpp	Mon Dec 11 14:39:27 2023 +0100
+++ b/Framework/Plugins/IndexBackend.cpp	Wed Dec 13 15:48:56 2023 +0100
@@ -176,6 +176,16 @@
             statement.IsDone());
   }
 
+  void IndexBackend::ClearRemainingAncestor(DatabaseManager& manager)
+  {
+    DatabaseManager::CachedStatement statement(
+      STATEMENT_FROM_HERE, manager,
+      "DELETE FROM RemainingAncestor");
+
+    statement.Execute();
+  }
+
+
 
   void IndexBackend::ClearDeletedFiles(DatabaseManager& manager)
   {
@@ -433,14 +443,7 @@
   {
     ClearDeletedFiles(manager);
     ClearDeletedResources(manager);
-    
-    {
-      DatabaseManager::CachedStatement statement(
-        STATEMENT_FROM_HERE, manager,
-        "DELETE FROM RemainingAncestor");
-
-      statement.Execute();
-    }
+    ClearRemainingAncestor(manager);
       
     {
       DatabaseManager::CachedStatement statement(
@@ -460,7 +463,6 @@
       DatabaseManager::CachedStatement statement(
         STATEMENT_FROM_HERE, manager,
         "SELECT * FROM RemainingAncestor");
-
       statement.Execute();
 
       if (!statement.IsDone())
@@ -470,12 +472,13 @@
           static_cast<OrthancPluginResourceType>(statement.ReadInteger32(0)));
           
         // There is at most 1 remaining ancestor
-        //assert((statement.Next(), statement.IsDone()));
+        assert((statement.Next(), statement.IsDone()));
       }
     }
-    
+
     SignalDeletedFiles(output, manager);
     SignalDeletedResources(output, manager);
+
   }
 
 
--- a/Framework/Plugins/IndexBackend.h	Mon Dec 11 14:39:27 2023 +0100
+++ b/Framework/Plugins/IndexBackend.h	Wed Dec 13 15:48:56 2023 +0100
@@ -46,9 +46,11 @@
     std::unique_ptr<IDatabaseBackendOutput::IFactory>  outputFactory_;
     
   protected:
-    void ClearDeletedFiles(DatabaseManager& manager);
+    virtual void ClearDeletedFiles(DatabaseManager& manager);
 
-    void ClearDeletedResources(DatabaseManager& manager);
+    virtual void ClearDeletedResources(DatabaseManager& manager);
+
+    virtual void ClearRemainingAncestor(DatabaseManager& manager);
 
     void SignalDeletedFiles(IDatabaseBackendOutput& output,
                             DatabaseManager& manager);
--- a/NOTES	Mon Dec 11 14:39:27 2023 +0100
+++ b/NOTES	Wed Dec 13 15:48:56 2023 +0100
@@ -1,7 +1,9 @@
 Resources:
 *********
 - PG transaction modes explained: https://www.postgresql.org/files/developer/concurrency.pdf
-- Isoolation level explained (PG + MySQL): https://amirsoleimani.medium.com/understanding-database-isolation-level-via-examples-mysql-and-postgres-a86b5502d404
+- Isolation level explained (PG + MySQL): https://amirsoleimani.medium.com/understanding-database-isolation-level-via-examples-mysql-and-postgres-a86b5502d404
+- Message queuing in PG: https://www.crunchydata.com/blog/message-queuing-using-native-postgresql
+
 
 Create and delete instances Internals:
 *************************************
@@ -166,17 +168,32 @@
 ************************************************************************
 
 In debug, no verbose logs
-Orthanc 1.12.1 + PG 5.1 (serializable mode)           : test_concurrent_anonymize_same_study with 4 workers and 10x repeat: 51.703 s
-Orthanc mainline + PG maineline (serializable mode)   : test_concurrent_anonymize_same_study with 4 workers and 10x repeat: 51.913 s
-Orthanc mainline + PG maineline (read-committed mode) : test_concurrent_anonymize_same_study with 4 workers and 10x repeat: 24.754 s
+Orthanc 1.12.1 + PG 5.1 (serializable mode)          : test_concurrent_anonymize_same_study with 4 workers and 10x repeat: 43.957 s
+Orthanc mainline + PG mainline (serializable mode)   : test_concurrent_anonymize_same_study with 4 workers and 10x repeat: XX s
+Orthanc mainline + PG mainline (read-committed mode) : test_concurrent_anonymize_same_study with 4 workers and 10x repeat: 19.861 s
+Orthanc mainline + PG mainline (read-committed mode) : test_concurrent_anonymize_same_study with 4 workers and 10x repeat: 22.500 s (with temporary tables)
 
-Orthanc 1.12.1 + PG 5.1 (serializable mode)           : test_concurrent_uploads_same_study with 20 workers and 1x repeat: 21.214 s
-Orthanc mainline + PG maineline (serializable mode)   : fails: No new instance while overwriting; this should not happen
-Orthanc mainline + PG maineline (read-committed mode) : test_concurrent_uploads_same_study with 20 workers and 1x repeat: 1.394 s (with assert((statement.Next(), statement.IsDone())) commented out)
+Orthanc 1.12.1 + PG 5.1 (serializable mode)          : test_concurrent_uploads_same_study with 20 workers and 1x repeat: 21.214 s
+Orthanc mainline + PG mainline (serializable mode)   : test_concurrent_uploads_same_study with 20 workers and 1x repeat: 23.010 s
+Orthanc mainline + PG mainline (read-committed mode) : test_concurrent_uploads_same_study with 20 workers and 1x repeat: 1.71 s (with RemainingAncestor.id)
+Orthanc mainline + PG mainline (read-committed mode) : test_concurrent_uploads_same_study with 20 workers and 1x repeat: 3.26 s (with temporary tables)
+
+Orthanc 1.12.1 + PG 5.1 (serializable mode)          : test_upload_delete_same_study_from_multiple_threads with 5 workers and 30x repeat: 23.016 s
+Orthanc mainline + PG mainline (read-committed mode) : test_upload_delete_same_study_from_multiple_threads with 5 workers and 30x repeat: 8.788 s
+Orthanc mainline + PG mainline (read-committed mode) : test_upload_delete_same_study_from_multiple_threads with 5 workers and 30x repeat: 17.129 s (with temporary tables)
 
 TODO:
 - assert((statement.Next(), statement.IsDone())) commented out  -> create temporary tables ?
+  try again temporary tables (with different names ?  Have a switch in the code ?)
+- have a separate "thread" to increment/decrement statistics because everybody is fighting to modify the GlobalIntegers rows
+- test events generation StableSeries ....
+- test RemainingAncestor response in integration tests: OK
+- disable MySQL ODBC plugin
+- run tests with docker localy + in CI
+- check https://discourse.orthanc-server.org/t/image-insert-are-too-slow-databse-performance-too-poor-when-using-mysql-mariadb/3820
 - PatientAddedFunc contains an IF
 - validate upgrade DB from previous Orthanc and from scratch
 - test with older version of PG
+
+DONE:
 - force the create/update DB transaction to be serializable (not needed: this is handled by POSTGRESQL_LOCK_DATABASE_SETUP)
--- a/PostgreSQL/Plugins/PostgreSQLIndex.cpp	Mon Dec 11 14:39:27 2023 +0100
+++ b/PostgreSQL/Plugins/PostgreSQLIndex.cpp	Wed Dec 13 15:48:56 2023 +0100
@@ -465,6 +465,81 @@
     }
   }
 
+  void PostgreSQLIndex::ClearDeletedFiles(DatabaseManager& manager)
+  {
+    { // note: the temporary table lifespan is the session, not the transaction -> that's why we need the IF NOT EXISTS
+      DatabaseManager::CachedStatement statement(
+        STATEMENT_FROM_HERE, manager,
+        "CREATE TEMPORARY TABLE IF NOT EXISTS DeletedFiles("
+        "uuid VARCHAR(64) NOT NULL,"
+        "fileType INTEGER,         "
+        "compressedSize BIGINT,    "
+        "uncompressedSize BIGINT,  "
+        "compressionType INTEGER,  "
+        "uncompressedHash VARCHAR(40),"
+        "compressedHash VARCHAR(40)"
+        ");"
+        );
+      statement.Execute();
+    }
+    {
+      DatabaseManager::CachedStatement statement(
+        STATEMENT_FROM_HERE, manager,
+        "DELETE FROM DeletedFiles;"
+        );
+
+      statement.Execute();
+    }
+  }
+
+  void PostgreSQLIndex::ClearDeletedResources(DatabaseManager& manager)
+  {
+    { // note: the temporary table lifespan is the session, not the transaction -> that's why we need the IF NOT EXISTS
+      DatabaseManager::CachedStatement statement(
+        STATEMENT_FROM_HERE, manager,
+        "CREATE TEMPORARY TABLE IF NOT EXISTS  DeletedResources("
+        "resourceType INTEGER NOT NULL,"
+        "publicId VARCHAR(64) NOT NULL"
+        ");"
+        );
+      statement.Execute();
+    }
+    {
+      DatabaseManager::CachedStatement statement(
+        STATEMENT_FROM_HERE, manager,
+        "DELETE FROM DeletedResources;"
+        );
+
+      statement.Execute();
+    }
+
+  }
+
+  void PostgreSQLIndex::ClearRemainingAncestor(DatabaseManager& manager)
+  {
+    { // note: the temporary table lifespan is the session, not the transaction -> that's why we need the IF NOT EXISTS
+      DatabaseManager::CachedStatement statement(
+        STATEMENT_FROM_HERE, manager,
+        "CREATE TEMPORARY TABLE IF NOT EXISTS  RemainingAncestor("
+        "resourceType INTEGER NOT NULL,"
+        "publicId VARCHAR(64) NOT NULL"
+        ")"
+        );
+
+      statement.Execute();
+    }
+    {
+      DatabaseManager::CachedStatement statement(
+        STATEMENT_FROM_HERE, manager,
+        "DELETE FROM RemainingAncestor;"
+        );
+
+      statement.Execute();
+    }
+
+  }
+
+
 
 #if ORTHANC_PLUGINS_HAS_DATABASE_CONSTRAINT == 1
   void PostgreSQLIndex::CreateInstance(OrthancPluginCreateInstanceResult& result,
--- a/PostgreSQL/Plugins/PostgreSQLIndex.h	Mon Dec 11 14:39:27 2023 +0100
+++ b/PostgreSQL/Plugins/PostgreSQLIndex.h	Wed Dec 13 15:48:56 2023 +0100
@@ -33,6 +33,14 @@
     PostgreSQLParameters   parameters_;
     bool                   clearAll_;
 
+  protected:
+    virtual void ClearDeletedFiles(DatabaseManager& manager);
+
+    virtual void ClearDeletedResources(DatabaseManager& manager);
+
+    virtual void ClearRemainingAncestor(DatabaseManager& manager);
+
+
   public:
     PostgreSQLIndex(OrthancPluginContext* context,
                     const PostgreSQLParameters& parameters);
--- a/PostgreSQL/Plugins/ResourceDeletedFunc.sql	Mon Dec 11 14:39:27 2023 +0100
+++ b/PostgreSQL/Plugins/ResourceDeletedFunc.sql	Wed Dec 13 15:48:56 2023 +0100
@@ -11,7 +11,7 @@
   
   -- If this resource still has siblings, keep track of the remaining parent
   -- (a parent that must not be deleted but whose LastUpdate must be updated)
-  INSERT INTO RemainingAncestor SELECT resourceType, publicId 
+  INSERT INTO RemainingAncestor SELECT resourceType, publicId
                                 FROM Resources 
                                 WHERE internalId = old.parentId
                                       AND EXISTS (SELECT 1 FROM Resources WHERE parentId = old.parentId);
@@ -26,3 +26,8 @@
 AFTER DELETE ON Resources
 FOR EACH ROW
 EXECUTE PROCEDURE ResourceDeletedFunc();
+
+-- we'll now use temporary tables so we need to remove the old tables !
+DROP TABLE IF EXISTS DeletedFiles;
+DROP TABLE IF EXISTS RemainingAncestor;
+DROP TABLE IF EXISTS DeletedResources;