Skip to content

Commit 362cef0

Browse files
Merge branch 'AliceO2Group:dev' into mid-params
2 parents f84120e + aad204d commit 362cef0

6 files changed

Lines changed: 34 additions & 27 deletions

File tree

DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,9 @@ struct ClusterNative {
6363
static constexpr int scalePadPacked = 64; //< ~60 is needed for 0.1mm precision, but power of two avoids rounding
6464
static constexpr int scaleSigmaTimePacked = 32; // 1/32nd of pad/timebin precision for cluster size
6565
static constexpr int scaleSigmaPadPacked = 32;
66-
static constexpr int scaleSaturatedQTot = 4;
67-
static constexpr int maxSaturatedQTot = USHRT_MAX * scaleSaturatedQTot;
66+
static constexpr int scaleSaturatedQtot = 8;
67+
static constexpr int maxRegularQtot = 25 * 1024;
68+
static constexpr int maxSaturatedQtot = (USHRT_MAX - maxRegularQtot) * scaleSaturatedQtot;
6869

6970
uint32_t timeFlagsPacked; //< Contains the time in the lower 24 bits in a packed format, contains the flags in the
7071
// upper 8 bits
@@ -89,9 +90,8 @@ struct ClusterNative {
8990
GPUd() uint16_t getQtot() const
9091
{
9192
if (isSaturated()) [[unlikely]] {
92-
// Check for overflow, so return type can stay uint16
93-
auto sqtot = getSaturatedQtot();
94-
return sqtot <= USHRT_MAX ? sqtot : USHRT_MAX;
93+
auto sQtot = getSaturatedQtot();
94+
return sQtot < USHRT_MAX ? sQtot : USHRT_MAX;
9595
}
9696
return qTot;
9797
}
@@ -155,19 +155,19 @@ struct ClusterNative {
155155
sigmaPadPacked = tmp;
156156
}
157157

158-
GPUd() bool isSaturated() const { return qMax >= 1023; }
158+
GPUd() bool isSaturated() const { return qTot > maxRegularQtot; }
159159

160160
GPUd() void setSaturatedQtot(uint32_t qtot)
161161
{
162-
if (qtot > maxSaturatedQTot) {
163-
qtot = maxSaturatedQTot;
162+
this->qTot = USHRT_MAX;
163+
if (qtot < maxSaturatedQtot) {
164+
this->qTot = ((qtot + scaleSaturatedQtot / 2) / scaleSaturatedQtot) + maxRegularQtot;
164165
}
165-
this->qTot = (qtot + scaleSaturatedQTot / 2) / scaleSaturatedQTot;
166166
}
167167

168168
GPUd() uint32_t getSaturatedQtot() const
169169
{
170-
return uint32_t(qTot) * scaleSaturatedQTot;
170+
return uint32_t(qTot - maxRegularQtot) * scaleSaturatedQtot;
171171
}
172172

173173
GPUd() void setSaturatedTailLength(uint32_t tail)

Framework/Core/src/ASoA.cxx

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -167,19 +167,25 @@ std::shared_ptr<arrow::Table> ArrowHelpers::concatTables(std::vector<std::shared
167167
return arrow::Table::Make(std::make_shared<arrow::Schema>(resultFields), columns);
168168
}
169169

170+
// ASCII-only lowercase. Column labels are plain identifiers, so we deliberately
171+
// avoid the locale-aware std::tolower: it goes through the C locale facet on
172+
// every character and dominated getIndexFromLabel in profiles.
173+
static constexpr char asciiToLower(char c)
174+
{
175+
return (c >= 'A' && c <= 'Z') ? static_cast<char>(c + 32) : c;
176+
}
177+
170178
arrow::ChunkedArray* getIndexFromLabel(arrow::Table* table, std::string_view label)
171179
{
172-
auto field = std::ranges::find_if(table->schema()->fields(), [&](std::shared_ptr<arrow::Field> const& f) {
173-
auto caseInsensitiveCompare = [](const std::string_view& str1, const std::string& str2) {
174-
return std::ranges::equal(
175-
str1, str2,
176-
[](char c1, char c2) {
177-
return std::tolower(static_cast<unsigned char>(c1)) ==
178-
std::tolower(static_cast<unsigned char>(c2));
179-
});
180-
};
181-
182-
return caseInsensitiveCompare(label, f->name());
180+
// Take the exact-match common case first (string_view comparison checks length
181+
// then memcmp), and fall back to a case-insensitive scan only when the labels
182+
// differ in case.
183+
auto field = std::ranges::find_if(table->schema()->fields(), [label](std::shared_ptr<arrow::Field> const& f) {
184+
std::string_view name = f->name();
185+
return label == name ||
186+
std::ranges::equal(label, name, [](char c1, char c2) {
187+
return asciiToLower(c1) == asciiToLower(c2);
188+
});
183189
});
184190
if (field == table->schema()->fields().end()) {
185191
o2::framework::throw_error(o2::framework::runtime_error_f("Unable to find column with label %s.", label));

GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess*
129129
if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionTruncate) {
130130
GPUTPCCompression::truncateSignificantBitsChargeMax(tmpClusters[k].qMax, param);
131131
GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaPadPacked, param);
132-
if (!tmpClusters[k].isSaturated()) {
132+
if (!tmpClusters[k].isSaturated()) [[likely]] {
133133
GPUTPCCompression::truncateSignificantBitsCharge(tmpClusters[k].qTot, param);
134134
GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaTimePacked, param);
135135
}

GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class GPUTPCClusterStatistics
4444
bool mDecodingError = false;
4545

4646
static constexpr uint32_t P_MAX_QMAX = GPUTPCCompression::P_MAX_QMAX;
47-
static constexpr uint32_t P_MAX_QTOT = GPUTPCCompression::P_MAX_QTOT;
47+
static constexpr uint32_t P_MAX_QTOT = GPUTPCCompression::P_MAX_SATURATED_QTOT;
4848
static constexpr uint32_t P_MAX_TIME = GPUTPCCompression::P_MAX_TIME;
4949
static constexpr uint32_t P_MAX_PAD = GPUTPCCompression::P_MAX_PAD;
5050
static constexpr uint32_t P_MAX_SIGMA = GPUTPCCompression::P_MAX_SIGMA;

GPU/GPUTracking/DataCompression/GPUTPCCompression.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,15 @@ class GPUTPCCompression : public GPUProcessor
4747
#endif
4848

4949
static constexpr uint32_t P_MAX_QMAX = 1 << 10;
50-
static constexpr uint32_t P_MAX_QTOT = 1 << 16;
50+
static constexpr uint32_t P_MAX_REGULAR_QTOT = 5 * 5 * P_MAX_QMAX;
51+
static constexpr uint32_t P_MAX_SATURATED_QTOT = 1 << 16; // Need two different limits as saturated clusters use full u16 range for qTot
5152
static constexpr uint32_t P_MAX_TIME = 1 << 24;
5253
static constexpr uint32_t P_MAX_PAD = 1 << 16;
5354
static constexpr uint32_t P_MAX_SIGMA = 1 << 8;
5455
static constexpr uint32_t P_MAX_FLAGS = 1 << 8;
5556
static constexpr uint32_t P_MAX_QPT = 1 << 8;
5657

57-
GPUd() static void truncateSignificantBitsCharge(uint16_t& charge, const GPUParam& param) { truncateSignificantBits(charge, param.rec.tpc.sigBitsCharge, P_MAX_QTOT); }
58+
GPUd() static void truncateSignificantBitsCharge(uint16_t& charge, const GPUParam& param) { truncateSignificantBits(charge, param.rec.tpc.sigBitsCharge, P_MAX_REGULAR_QTOT); }
5859
GPUd() static void truncateSignificantBitsChargeMax(uint16_t& charge, const GPUParam& param) { truncateSignificantBits(charge, param.rec.tpc.sigBitsCharge, P_MAX_QMAX); }
5960
GPUd() static void truncateSignificantBitsWidth(uint8_t& width, const GPUParam& param) { truncateSignificantBits(width, param.rec.tpc.sigBitsWidth, P_MAX_SIGMA); }
6061

GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step0at
122122
if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionTruncate) {
123123
compressor.truncateSignificantBitsChargeMax(qmax, param);
124124
compressor.truncateSignificantBitsWidth(sigmapad, param);
125-
if (!orgCl.isSaturated()) {
125+
if (!orgCl.isSaturated()) [[likely]] {
126126
compressor.truncateSignificantBitsCharge(qtot, param);
127127
compressor.truncateSignificantBitsWidth(sigmatime, param);
128128
}
@@ -301,7 +301,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1un
301301
if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionTruncate) {
302302
compressor.truncateSignificantBitsChargeMax(qmax, param);
303303
compressor.truncateSignificantBitsWidth(sigmapad, param);
304-
if (!orgCl.isSaturated()) {
304+
if (!orgCl.isSaturated()) [[likely]] {
305305
compressor.truncateSignificantBitsCharge(qtot, param);
306306
compressor.truncateSignificantBitsWidth(sigmatime, param);
307307
}

0 commit comments

Comments
 (0)