comparison NOTES @ 502:bf4b9c7cf338

todo & notes
author Alain Mazy <am@orthanc.team>
date Fri, 03 May 2024 16:22:02 +0200
parents 0a8b34e3a337
children
comparison
equal deleted inserted replaced
500:c27071770c04 502:bf4b9c7cf338
210 1 connection : test_concurrent_uploads_same_study with 20 workers and 1x repeat: 10.223 s 210 1 connection : test_concurrent_uploads_same_study with 20 workers and 1x repeat: 10.223 s
211 10 connections : test_upload_delete_same_study_from_multiple_threads with 5 workers and 3x repeat (10x): 7.129 s 211 10 connections : test_upload_delete_same_study_from_multiple_threads with 5 workers and 3x repeat (10x): 7.129 s
212 1 connection : test_upload_delete_same_study_from_multiple_threads with 5 workers and 3x repeat (10x): 11.172 s 212 1 connection : test_upload_delete_same_study_from_multiple_threads with 5 workers and 3x repeat (10x): 11.172 s
213 213
214 214
215 TODO: 215 LARGE QUERIES
216 - have a separate "thread" to UpdateStatistics ? 216 -------------
217 217
218 - check https://discourse.orthanc-server.org/t/image-insert-are-too-slow-databse-performance-too-poor-when-using-mysql-mariadb/3820 218 new extended-api-v1/tools/find requirements (note: extended-api-v1 is a code name:-) ):
219 219 - be able to list child-resources like in /studies/.../instances?expand
220 DONE: 220 => we need a "ParentStudy" in the query. Note: a single "Parent" field is not enough since we sometimes want to specify a "Parent"
221 - implement a downgrade script ? And test it in PotgresUpgrades integ tests 221 that is 2 or 3 levels higher (e.g /patients/../instances)
222 - test the transfer plugin 222 e.g: {
223 - perf tests: upload generated data (different studies) 223 "Level": "Instance",
224 - In Docker images, re-enable MySQL & ODBC plugins + tests 224 "Query": {
225 - reenable PatientRecyclingOrder 225 "ParentStudy": "orthanc-study-id",
226 - force the create/update DB transaction to be serializable (not needed: this is handled by POSTGRESQL_LOCK_DATABASE_SETUP) 226 },
227 - PatientAddedFunc contains an IF (check if other IF/THEN/ELSE pattern remains) 227 }
228 - validate upgrade DB from previous Orthanc and from scratch 228 - be able to search on labels like in the current tools/find implementation
229 - check minimal version of PG (9.5 - 9.6 ? for create index if not exists): seems to work with 9.5 cfr PotgresUpgrades integ tests 229 => we need a "ParentStudy" in the query. Note: a single "Parent" field is not enough since we sometimes want to specify a "Parent"
230 - test events generation StableSeries .... (count the NewSeries, NewInstances event and make sure they match the numb) 230 that is 2 or 3 levels higher (e.g /patients/../instances)
231 e.g: {
232 "Level": "Study",
233 "Query": {
234 "Labels": ["hello"],
235 "LabelsConstraint" : "All",
236 },
237 }
238 - be able to select what are the expected fields in the response
239 => we need a "ExpandContent" field in the query.
240 If there are no "ExpandContent", we simply return a list of IDs
241 e.g: {
242 "Level": "Series",
243 "Query": {
244 "ParentStudy": "orthanc-study-id",
245 },
246 "ExpandContent": [
247 "Children", // the list of orthanc-ids of the child of the resources (equivalent to e.g /series/../instances)
248 "Parent", // the resource parent id (e.g. "ParentStudy" for a series)
249 "Labels", // the resources labels (equivalent to e.g /series/../labels)
250 "Metadata", // the resources metadata (equivalent to e.g. /series/../metadata?expand)
251 "MainDicomTags", // the resources MainDicomTags
252 "FileInfo", // "FileSize" + "FileUuid", applicable only to instances
253 // TODO: what about "IsStable", "Status", "LastUpdate", "IndexInSeries", "ExpectedNumberOfInstances" ???
254 // TODO: right now, we have a "RequestedTags" option that can list the DICOM TAGS and tools/find can fetch the tags from disk if they are not in the ExtraMainDicomTags
255 ]
256 }
257 - be able to filter on DICOM Tags (and compose with a Parent orthanc id)
258 e.g: get all US series from this study
259 {
260 "Level": "Series",
261 "Query": {
262 "ParentStudy": "orthanc-study-id",
263 "DicomTags": {
264 "Modality": "US"
265 }
266 }
267 }
268 - be able to filter on a metadata (and compose with a Parent orthanc id)
269 e.g: to get all PDF instances of a study
270 {
271 "Level": "Instance",
272 "Query": {
273 "ParentStudy": "orthanc-study-id",
274 "Metadata": {
275 "SopClassUid": "1.2.840.10008.5.1.4.1.1.104.1"
276 }
277 }
278 }
279 - be able to order results on one or more DicomTags or Metadata + limit results + offset (for pagination)
280 e.g: to get page 5 of all studies from this month
281 {
282 "Level": "Study",
283 "Query": {
284 "DicomTags": {
285 "StudyDate": "20240101-20240131"
286 },
287 "OrderBy": [
288 {
289 "DicomTag": "StudyDate",
290 "Order": "Desc"
291 },
292 {
293 "Metadata": "LastUpdate",
294 "Order": "Desc"
295 }
296 ],
297 "Limit": 1000,
298 "Since": 5000
299 }
300 }
301
302
303 current queries - single level:
304 ------------------------------
305
306 Search:
307 sql = ("SELECT publicId, internalId "
308 "FROM Resources "
309 "WHERE resourceType = " + formatter.FormatResourceType(queryLevel)
310 + " ");
311
312 if (dicomIdentifiersComparisons.size() > 0)
313 sql += (" AND internalId IN (SELECT id FROM DicomIdentifiers WHERE " + *it + ") ");
314
315 if (mainDicomTagsComparisons.size() > 0)
316 sql += (" AND internalId IN (SELECT id FROM MainDicomTags WHERE " + *it + ") ");
317
318 if (!labels.empty())
319 sql += (" AND internalId " + inOrNotIn + " (SELECT id"
320 " FROM (SELECT id, COUNT(1) AS labelsCount "
321 "FROM Labels "
322 "WHERE label IN (" + Join(formattedLabels, "", ", ") + ") GROUP BY id"
323 ") AS temp "
324 " WHERE labelsCount " + condition + ")");
325 if (limit != 0)
326 sql += " LIMIT " + boost::lexical_cast<std::string>(limit);
327
328
329 Patients:
330 "SELECT patients_studies.patients_public_id, MIN(instances.publicId) AS instances_public_id "
331 "FROM (SELECT patients.publicId AS patients_public_id, MIN(studies.internalId) AS studies_internal_id "
332 "FROM (" + sql +
333 ") AS patients "
334 "INNER JOIN Resources studies ON studies.parentId = patients.internalId "
335 "GROUP BY patients.publicId "
336 ") AS patients_studies "
337 "INNER JOIN Resources series ON series.parentId = patients_studies.studies_internal_id "
338 "INNER JOIN Resources instances ON instances.parentId = series.internalId "
339 "GROUP BY patients_studies.patients_public_id"
340
341 Studies:
342 "SELECT studies_series.studies_public_id, MIN(instances.publicId) AS instances_public_id "
343 "FROM (SELECT studies.publicId AS studies_public_id, MIN(series.internalId) AS series_internal_id "
344 "FROM (" + sql +
345 ") AS studies "
346 "INNER JOIN Resources series ON series.parentId = studies.internalId "
347 "GROUP BY studies.publicId "
348 ") AS studies_series "
349 "INNER JOIN Resources instances ON instances.parentId = studies_series.series_internal_id "
350 "GROUP BY studies_series.studies_public_id"
351
352 Series:
353 "SELECT series.publicId AS series_public_id, MIN(instances.publicId) AS instances_public_id "
354 "FROM (" + sql +
355 ") AS series "
356 "INNER JOIN Resources instances ON instances.parentId = series.internalId "
357 "GROUP BY series.publicId "
358
359 Instances:
360 "SELECT instances.publicId, instances.publicId FROM (" + sql + ") instances"
361
362
363 current queries - multi level:
364 ------------------------------
365 Search: ISqlLookupFormatter::Apply
366
367
368
369 New queries:
370 -----------
371
372 - Get all series ids where PatientID (16, 32)="ID-DYL-WAL-00000008"
373 StudyDescription (8, 4144) %LIKE% "000000"
374 order by StudyDate(8, 32) DESC
375
376 Current: curl http://localhost:8044/tools/find -d '{"Level": "Series", "Query": {"StudyDescription": "*000000*", "PatientID": "ID-DYL-WAL-00000008"}}'
377 SELECT series.publicId, MIN(instances.publicId)
378 FROM (SELECT series.publicId, series.internalId
379 FROM Resources AS series
380 INNER JOIN Resources studies ON studies.internalId=series.parentId
381 INNER JOIN DicomIdentifiers t0 ON t0.id = studies.internalId AND t0.tagGroup = 16 AND t0.tagElement = 32 -- Patient tags are copied at study level !
382 INNER JOIN DicomIdentifiers t1 ON t1.id = studies.internalId AND t1.tagGroup = 8 AND t1.tagElement = 4144
383 WHERE series.resourceType = 2 AND t0.value = 'ID-DYL-WAL-00000008' AND t1.value like '%000000%' ESCAPE '\' LIMIT 101) series
384 INNER JOIN Resources instances ON instances.parentId = series.internalId GROUP BY series.publicId
385
386
387 With order by, we must not retrieve the instance anymore:
388 SELECT series.publicId, series.internalId
389 FROM Resources AS series
390 INNER JOIN Resources studies ON studies.internalId=series.parentId
391 INNER JOIN DicomIdentifiers t0 ON t0.id = studies.internalId AND t0.tagGroup = 16 AND t0.tagElement = 32 -- Patient tags are copied at study level !
392 INNER JOIN DicomIdentifiers t1 ON t1.id = studies.internalId AND t1.tagGroup = 8 AND t1.tagElement = 4144
393 INNER JOIN DicomIdentifiers t2 ON t2.id = studies.internalId AND t2.tagGroup = 8 AND t2.tagElement = 32
394 WHERE series.resourceType = 2 AND t0.value = 'ID-DYL-WAL-00000008' AND t1.value like '%000000%' ESCAPE '\'
395 ORDER BY t2 DESC
396 LIMIT 101
397
398 Combine multiple search criteria:
399 Search studies with a label constraints + a Metadata filter + a DICOM Tag filter + a ParentPatient filter
400
401 SELECT studies.publicId, studies.internalId
402 FROM Resources AS studies
403 INNER JOIN
404
405 WHERE resourceType = 1 AND internalId IN (SELECT id FROM DicomIdentifiers WHERE tagGroup = 16 AND tagElement = 32 AND value = $1) AND internalId IN (SELECT id FROM DicomIdentifiers WHERE tagGroup = 8 AND tagElement = 32 AND value >= $2) AND internalId IN (SELECT id FROM DicomIdentifiers WHERE tagGroup = 8 AND tagElement = 4144 AND value LIKE $3 ESCAPE '\') LIMIT 101) AS studies INNER JOIN Resources series ON series.parentId = studies.internalId GROUP BY studies.publicId ) AS studies_series INNER JOIN Resources instances ON instances.parentId = studies_series.series_internal_id GROUP BY studies_series.studies_public_id
406
407
408 -- current search at study level (single level -> we use IN instead of INNER JOIN)
409 SELECT publicId, internalId
410 FROM Resources
411 WHERE resourceType = 1
412 AND internalId IN (SELECT id
413 FROM DicomIdentifiers WHERE tagGroup = 16 AND tagElement = 32 AND value = 'ID-DYL-WAL-00000008')
414 AND internalId IN (SELECT id
415 FROM DicomIdentifiers WHERE tagGroup = 8 AND tagElement = 32 AND value >= '19700101')
416 AND internalId IN (SELECT id
417 FROM DicomIdentifiers WHERE tagGroup = 8 AND tagElement = 4144 AND value LIKE '%000000%' ESCAPE '\')
418 AND internalId IN (SELECT id
419 FROM (SELECT id, COUNT(1) AS labelsCount FROM Labels WHERE label IN ('Label2', 'Label3') GROUP BY id) AS temp
420 WHERE labelsCount >= 1)
421 AND internalId IN (SELECT id
422 FROM Metadata WHERE type=7 AND value >= '20240415T120000')
423 LIMIT 101
424
425
426 -- same query with INNER JOIN to compare performance
427 SELECT studies.publicId, studies.internalId
428 FROM Resources AS studies
429 INNER JOIN DicomIdentifiers t0 ON t0.id = studies.internalId AND t0.tagGroup = 16 AND t0.tagElement = 32 AND t0.value = 'ID-DYL-WAL-00000008' -- Patient tags are copied at study level !
430 INNER JOIN DicomIdentifiers t1 ON t1.id = studies.internalId AND t1.tagGroup = 8 AND t1.tagElement = 4144 AND t1.value like '%000000%' ESCAPE '\'
431 INNER JOIN DicomIdentifiers t2 ON t2.id = studies.internalId AND t2.tagGroup = 8 AND t2.tagElement = 32 AND t2.value >= '19700101'
432 INNER JOIN Metadata m ON m.id = studies.internalId AND m.type = 7 AND m.value >= '20240415T120000'
433 WHERE studies.resourceType = 1
434 -- AND studies.internalId IN (SELECT id FROM (SELECT id, COUNT(1) AS labelsCount FROM labels WHERE label IN ('Label2', 'Label6') GROUP BY id) AS temp WHERE labelsCount = 2) -- All labels
435 -- AND studies.internalId IN (SELECT id FROM (SELECT id, COUNT(1) AS labelsCount FROM labels WHERE label IN ('Label2', 'Label6') GROUP BY id) AS temp WHERE labelsCount >= 1) -- Any labels
436 AND studies.internalId NOT IN (SELECT id FROM (SELECT id, COUNT(1) AS labelsCount FROM labels WHERE label IN ('Label2', 'Label6') GROUP BY id) AS temp WHERE labelsCount >= 1) -- None of labels
437 ORDER BY t2.value DESC, m.value ASC
438 LIMIT 100 OFFSET 0
439
440
441 We may store this in a temporary table that we can reuse e.g to retrieve, the labels, the metadata, the MainDicomTags ...
442 CREATE TEMPORARY TABLE FilteredResourcesIds AS
443 SELECT series.publicId, series.internalId
444 FROM Resources AS series
445 INNER JOIN Resources studies ON studies.internalId=series.parentId
446 INNER JOIN DicomIdentifiers t0 ON t0.id = studies.internalId AND t0.tagGroup = 16 AND t0.tagElement = 32 -- Patient tags are copied at study level !
447 INNER JOIN DicomIdentifiers t1 ON t1.id = studies.internalId AND t1.tagGroup = 8 AND t1.tagElement = 4144
448 INNER JOIN DicomIdentifiers t2 ON t2.id = studies.internalId AND t2.tagGroup = 8 AND t2.tagElement = 32
449 WHERE series.resourceType = 2 AND t0.value = 'ID-DYL-WAL-00000008' AND t1.value like '%000000%' ESCAPE '\'
450 ORDER BY t2 DESC
451 LIMIT 101;
452
453 Note: we can probably only retrieve the internalId as the key -> since it is an integer, it is more efficient than a uuid to populate the responses
454 Retrieve all MainDicomTags at the series level
455 SELECT internalId, publicId, tagGroup, tagElement, value FROM MainDicomTags AS tags
456 INNER JOIN FilteredResourcesIds ON tags.id = FilteredResourcesIds.internalId;
457
458 Retrieve all MainDicomTags at the study level too but attach them to the series ids
459 SELECT series.internalId, series.publicId, tagGroup, tagElement, value
460 FROM Resources as series
461 INNER JOIN Resources studies ON studies.internalId=series.parentId
462 INNER JOIN FilteredResourcesIds filtered ON filtered.internalId = series.internalId
463 INNER JOIN MainDicomTags tags ON tags.id=studies.internalId
464
465 Retrieve all Metadata from all the series
466 SELECT metadata.id, series.publicId, metadata.type, metadata.value, metadata.revision
467 FROM Metadata
468 INNER JOIN FilteredResourcesIds filtered ON filtered.internalId = Metadata.id
469
470 Retrieve all Labels from all the series (not tested yet)
471 SELECT labels.id, series.publicId, label
472 FROM Labels
473 INNER JOIN FilteredResourcesIds filtered ON filtered.internalId = Labels.id
474
475 Retrieve all parents from all the series
476 SELECT filtered.internalId, filtered.publicId, parentLevel.internalId as parentInternalId, parentLevel.publicId as parentPublicId
477 FROM Resources as currentLevel
478 INNER JOIN FilteredResourcesIds filtered ON filtered.internalId = currentLevel.internalId
479 INNER JOIN Resources parentLevel ON currentLevel.parentId = parentLevel.internalId
480
481 Retrieve all children from all the series
482 SELECT filtered.internalId, filtered.publicId, childLevel.internalId as childInternalId, childLevel.publicId as childPublicId
483 FROM Resources as currentLevel
484 INNER JOIN FilteredResourcesIds filtered ON filtered.internalId = currentLevel.internalId
485 INNER JOIN Resources childLevel ON childLevel.parentId = currentLevel.internalId
486
487 Retrieve one instanceId by series
488 SELECT filtered.internalId, filtered.publicId, MIN(c0.publicId) as instancePublicId
489 FROM Resources as currentLevel
490 INNER JOIN FilteredResourcesIds filtered ON filtered.internalId = currentLevel.internalId
491 INNER JOIN Resources c0 ON c0.parentId = currentLevel.internalId
492 GROUP BY filtered.internalId, filtered.publicId
493
494 TODO Retrieve all attachments from all the series