Mercurial > hg > orthanc-stone
annotate OrthancStone/Sources/Toolbox/AlignedMatrix.cpp @ 2069:5956d7357098 deep-learning
macro ORTHANC_HAS_WASM_SIMD has to be manually defined
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Fri, 19 May 2023 17:00:17 +0200 |
parents | 22a83fb9dd23 |
children |
rev | line source |
---|---|
2068
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
1 /** |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
2 * Stone of Orthanc |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
3 * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
4 * Department, University Hospital of Liege, Belgium |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
5 * Copyright (C) 2017-2022 Osimis S.A., Belgium |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
6 * Copyright (C) 2021-2022 Sebastien Jodogne, ICTEAM UCLouvain, Belgium |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
7 * |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
8 * This program is free software: you can redistribute it and/or |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
9 * modify it under the terms of the GNU Lesser General Public License |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
10 * as published by the Free Software Foundation, either version 3 of |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
11 * the License, or (at your option) any later version. |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
12 * |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
13 * This program is distributed in the hope that it will be useful, but |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
14 * WITHOUT ANY WARRANTY; without even the implied warranty of |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
16 * Lesser General Public License for more details. |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
17 * |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
18 * You should have received a copy of the GNU Lesser General Public |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
19 * License along with this program. If not, see |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
20 * <http://www.gnu.org/licenses/>. |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
21 **/ |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
22 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
23 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
24 #include "AlignedMatrix.h" |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
25 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
26 #include <OrthancException.h> |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
27 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
28 #include <string.h> |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
29 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
30 namespace OrthancStone |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
31 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
32 static unsigned int Ceiling(unsigned int a, |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
33 unsigned int b) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
34 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
35 if (a % b == 0) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
36 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
37 return a / b; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
38 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
39 else |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
40 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
41 return a / b + 1; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
42 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
43 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
44 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
45 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
46 void AlignedMatrix::Setup(unsigned int rows, |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
47 unsigned int cols) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
48 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
49 assert(sizeof(float) == 4); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
50 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
51 if (rows == 0 || |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
52 cols == 0) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
53 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
54 rows_ = 0; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
55 cols_ = 0; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
56 pitch_ = 0; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
57 pitchFloatPointer_ = 0; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
58 content_ = NULL; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
59 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
60 else |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
61 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
62 rows_ = rows; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
63 cols_ = cols; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
64 pitch_ = Ceiling(cols * sizeof(float), ORTHANC_MEMORY_ALIGNMENT) * ORTHANC_MEMORY_ALIGNMENT; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
65 pitchFloatPointer_ = pitch_ / sizeof(float); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
66 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
67 void* tmp = NULL; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
68 if (posix_memalign(&tmp, ORTHANC_MEMORY_ALIGNMENT, rows_ * pitch_) != 0) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
69 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
70 throw Orthanc::OrthancException(Orthanc::ErrorCode_NotEnoughMemory); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
71 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
72 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
73 assert(reinterpret_cast<intptr_t>(tmp) % ORTHANC_MEMORY_ALIGNMENT == 0); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
74 assert(pitch_ % ORTHANC_MEMORY_ALIGNMENT == 0); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
75 assert(pitch_ % sizeof(float) == 0); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
76 assert((rows_ * pitch_) % ORTHANC_MEMORY_ALIGNMENT == 0); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
77 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
78 content_ = static_cast<float*>(tmp); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
79 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
80 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
81 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
82 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
83 AlignedMatrix::~AlignedMatrix() |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
84 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
85 if (content_ != NULL) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
86 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
87 free(content_); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
88 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
89 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
90 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
91 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
92 void AlignedMatrix::FillZeros() |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
93 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
94 memset(content_, 0, rows_ * pitch_); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
95 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
96 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
97 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
98 void AlignedMatrix::ProductPlain(AlignedMatrix& c, |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
99 const AlignedMatrix& a, |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
100 const AlignedMatrix& b) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
101 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
102 if (c.GetRows() != a.GetRows() || |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
103 c.GetColumns() != b.GetColumns() || |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
104 a.GetColumns() != b.GetRows()) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
105 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
106 throw Orthanc::OrthancException(Orthanc::ErrorCode_IncompatibleImageSize); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
107 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
108 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
109 const unsigned int M = c.GetRows(); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
110 const unsigned int N = c.GetColumns(); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
111 const unsigned int K = a.GetColumns(); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
112 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
113 c.FillZeros(); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
114 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
115 for (unsigned int i = 0; i < M; i++) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
116 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
117 // Loop over "k" to be more cache-friendly |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
118 // https://sahnimanas.github.io/post/anatomy-of-a-high-performance-convolution/ |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
119 for (unsigned int k = 0; k < K; k++) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
120 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
121 for (unsigned int j = 0; j < N; j++) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
122 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
123 c.AddValue(i, j, a.GetValue(i, k) * b.GetValue(k, j)); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
124 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
125 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
126 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
127 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
128 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
129 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
130 #if ORTHANC_HAS_MATRIX_PRODUCT_TRANSPOSED_VECTORIZED == 1 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
131 // Computes "C = A*B^T" |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
132 class AlignedMatrix::ProductTransposedVectorizedContext : public boost::noncopyable |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
133 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
134 private: |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
135 unsigned int vectorizedSteps_; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
136 uint8_t finalSteps_; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
137 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
138 public: |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
139 ORTHANC_FORCE_INLINE |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
140 ProductTransposedVectorizedContext(const AlignedMatrix& a) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
141 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
142 #if ORTHANC_HAS_AVX2 == 1 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
143 const unsigned int blockSize = 8; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
144 #elif ORTHANC_HAS_SSE2 == 1 || ORTHANC_HAS_WASM_SIMD == 1 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
145 const unsigned int blockSize = 4; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
146 #else |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
147 # error No supported SIMD instruction set |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
148 #endif |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
149 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
150 vectorizedSteps_ = a.GetColumns() / blockSize; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
151 finalSteps_ = a.GetColumns() - vectorizedSteps_ * blockSize; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
152 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
153 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
154 ORTHANC_FORCE_INLINE |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
155 float Apply(const float* ap, |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
156 const float* btp) const noexcept |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
157 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
158 float result; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
159 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
160 #if ORTHANC_HAS_AVX2 == 1 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
161 __m256 accumulator = _mm256_set1_ps(0); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
162 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
163 for (unsigned int k = 0; k < vectorizedSteps_; k++) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
164 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
165 __m256 a = _mm256_load_ps(ap); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
166 __m256 b = _mm256_load_ps(btp); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
167 //accumulator = _mm256_add_ps(accumulator, _mm256_mul_ps(a, b)); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
168 accumulator = _mm256_fmadd_ps(a, b, accumulator); // Requires the "-mfma" compiler flag |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
169 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
170 ap += 8; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
171 btp += 8; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
172 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
173 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
174 float tmp[8] __attribute__ ((aligned (ORTHANC_MEMORY_ALIGNMENT))); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
175 _mm256_store_ps(tmp, accumulator); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
176 result = tmp[0] + tmp[1] + tmp[2] + tmp[3] + tmp[4] + tmp[5] + tmp[6] + tmp[7]; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
177 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
178 #elif ORTHANC_HAS_SSE2 == 1 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
179 __m128 accumulator = _mm_set1_ps(0); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
180 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
181 for (unsigned int k = 0; k < vectorizedSteps_; k++) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
182 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
183 __m128 a = _mm_load_ps(ap); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
184 __m128 b = _mm_load_ps(btp); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
185 accumulator = _mm_add_ps(accumulator, _mm_mul_ps(a, b)); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
186 ap += 4; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
187 btp += 4; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
188 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
189 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
190 #if 1 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
191 float tmp[4] __attribute__ ((aligned (ORTHANC_MEMORY_ALIGNMENT))); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
192 _mm_storeu_ps(tmp, accumulator); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
193 result = tmp[0] + tmp[1] + tmp[2] + tmp[3]; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
194 #else |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
195 // This trickier version is theoretically faster, but no much difference in practice |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
196 const __m128 sum2 = _mm_add_ps(accumulator, _mm_shuffle_ps(accumulator, accumulator, _MM_SHUFFLE(2, 3, 0, 1))); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
197 const __m128 sum1 = _mm_add_ps(sum2, _mm_shuffle_ps(sum2, sum2, _MM_SHUFFLE(0, 1, 2, 3))); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
198 result = _mm_cvtss_f32(sum1); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
199 #endif |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
200 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
201 #elif ORTHANC_HAS_WASM_SIMD == 1 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
202 v128_t accumulator = wasm_f32x4_splat(0); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
203 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
204 for (unsigned int k = 0; k < vectorizedSteps_; k++) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
205 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
206 v128_t a = wasm_v128_load(ap); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
207 v128_t b = wasm_v128_load(btp); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
208 accumulator = wasm_f32x4_add(accumulator, wasm_f32x4_mul(a, b)); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
209 ap += 4; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
210 btp += 4; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
211 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
212 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
213 #if 1 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
214 float tmp[4]; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
215 wasm_v128_store(tmp, accumulator); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
216 result = tmp[0] + tmp[1] + tmp[2] + tmp[3]; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
217 #else |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
218 const v128_t sum2 = wasm_f32x4_add(accumulator, wasm_i32x4_shuffle(accumulator, accumulator, 2, 3, 0, 0)); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
219 const v128_t sum1 = wasm_f32x4_add(sum2, wasm_i32x4_shuffle(sum2, sum2, 1, 0, 0, 0)); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
220 result = wasm_f32x4_extract_lane(sum1, 0); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
221 #endif |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
222 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
223 #else |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
224 # error No supported SIMD instruction set |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
225 #endif |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
226 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
227 for (uint8_t k = 0; k < finalSteps_; k++) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
228 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
229 result += (*ap) * (*btp); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
230 ap++; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
231 btp++; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
232 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
233 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
234 return result; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
235 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
236 }; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
237 #endif |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
238 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
239 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
240 #if ORTHANC_HAS_MATRIX_PRODUCT_TRANSPOSED_VECTORIZED == 1 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
241 void AlignedMatrix::ProductTransposedVectorized(AlignedMatrix& c, |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
242 const AlignedMatrix& a, |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
243 const AlignedMatrix& bt) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
244 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
245 if (c.GetRows() != a.GetRows() || |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
246 c.GetColumns() != bt.GetRows() || |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
247 a.GetColumns() != bt.GetColumns()) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
248 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
249 throw Orthanc::OrthancException(Orthanc::ErrorCode_IncompatibleImageSize); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
250 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
251 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
252 AlignedMatrix::ProductTransposedVectorizedContext context(a); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
253 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
254 const unsigned int M = a.GetRows(); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
255 const unsigned int N = bt.GetRows(); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
256 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
257 const size_t rowSizeA = a.GetPitch() / sizeof(float); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
258 const size_t rowSizeB = bt.GetPitch() / sizeof(float); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
259 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
260 const float* ap = a.GetRowPointer(0); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
261 for (unsigned int i = 0; i < M; i++) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
262 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
263 float* cp = c.GetRowPointer(i); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
264 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
265 const float* btp = bt.GetRowPointer(0); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
266 for (unsigned int j = 0; j < N; j++, cp++) |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
267 { |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
268 *cp = context.Apply(ap, btp); |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
269 btp += rowSizeB; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
270 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
271 |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
272 ap += rowSizeA; |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
273 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
274 } |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
275 #endif |
22a83fb9dd23
added AlignedMatrix and TimerLogger
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
276 } |