摘要:
记录clickhouse向量化colume
SIMD:
以加法指令为例,单指令单数据(SISD)的CPU对加法指令译码后,执行部件先访问内存,取得第一个操作数;之后再一次访问内存,取得第二个操作数;随后才能进行求和运算。而在SIMD型的CPU中,指令译码后几个执行部件同时访问内存,一次性获得所有操作数进行运算。这个特点使SIMD特别适合于多媒体应用等数据密集型运算。
如:AMD公司引以为豪的3D NOW! 技术实质就是SIMD,这使K6-2、雷鸟、毒龙处理器在音频解码、视频回放、3D游戏等应用中显示出优异的性能。
架构:
静态结构:
核心函数:
executeImpl
src/Functions/FunctionUnaryArithmetic.h:295
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
/// Special case when the function is negate, argument is tuple.
if (auto function_builder = getFunctionForTupleArithmetic(arguments[0].type, context))
{
return function_builder->build(arguments)->execute(arguments, result_type, input_rows_count);
}
ColumnPtr result_column;
bool valid = castType(arguments[0].type.get(), [&](const auto & type)
{
using DataType = std::decay_t<decltype(type)>;
if constexpr (std::is_same_v<DataTypeFixedString, DataType>)
{
if constexpr (allow_fixed_string)
{
if (const auto * col = checkAndGetColumn<ColumnFixedString>(arguments[0].column.get()))
{
auto col_res = ColumnFixedString::create(col->getN());
auto & vec_res = col_res->getChars();
vec_res.resize(col->size() * col->getN());
FixedStringUnaryOperationImpl<Op<UInt8>>::vector(col->getChars(), vec_res);
result_column = std::move(col_res);
return true;
}
}
}
else if constexpr (IsDataTypeDecimal<DataType>)
{
using T0 = typename DataType::FieldType;
if constexpr (allow_decimal)
{
if (auto col = checkAndGetColumn<ColumnDecimal<T0>>(arguments[0].column.get()))
{
if constexpr (is_sign_function)
{
auto col_res = ColumnVector<typename Op<T0>::ResultType>::create();
auto & vec_res = col_res->getData();
vec_res.resize(col->getData().size());
UnaryOperationImpl<T0, Op<T0>>::vector(col->getData(), vec_res);
result_column = std::move(col_res);
return true;
}
else
{
auto col_res = ColumnDecimal<typename Op<T0>::ResultType>::create(0, type.getScale());
auto & vec_res = col_res->getData();
vec_res.resize(col->getData().size());
UnaryOperationImpl<T0, Op<T0>>::vector(col->getData(), vec_res);
result_column = std::move(col_res);
return true;
}
}
}
}
else
{
using T0 = typename DataType::FieldType;
if (auto col = checkAndGetColumn<ColumnVector<T0>>(arguments[0].column.get()))
{
auto col_res = ColumnVector<typename Op<T0>::ResultType>::create();
auto & vec_res = col_res->getData();
vec_res.resize(col->getData().size());
UnaryOperationImpl<T0, Op<T0>>::vector(col->getData(), vec_res);
result_column = std::move(col_res);
return true;
}
}
return false;
});
if (!valid)
throw Exception(getName() + "'s argument does not match the expected data type", ErrorCodes::LOGICAL_ERROR);
return result_column;
}
关键点:
using T0 = typename DataType::FieldType;
if (auto col = checkAndGetColumn<ColumnVector<T0>>(arguments[0].column.get()))
{
auto col_res = ColumnVector<typename Op<T0>::ResultType>::create();
auto & vec_res = col_res->getData();
vec_res.resize(col->getData().size());
UnaryOperationImpl<T0, Op<T0>>::vector(col->getData(), vec_res);
result_column = std::move(col_res);
return true;
}
UnaryOperationImpl
template <typename A, typename Op>
struct UnaryOperationImpl
{
using ResultType = typename Op::ResultType;
using ColVecA = ColumnVectorOrDecimal<A>;
using ColVecC = ColumnVectorOrDecimal<ResultType>;
using ArrayA = typename ColVecA::Container;
using ArrayC = typename ColVecC::Container;
MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(vectorImpl,
MULTITARGET_FH(
static void NO_INLINE), /*vectorImpl*/ MULTITARGET_FB((const ArrayA & a, ArrayC & c) /// NOLINT
{
size_t size = a.size();
for (size_t i = 0; i < size; ++i)
c[i] = Op::apply(a[i]);
}))
static void NO_INLINE vector(const ArrayA & a, ArrayC & c)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX2))
{
vectorImplAVX2(a, c);
return;
}
else if (isArchSupported(TargetArch::SSE42))
{
vectorImplSSE42(a, c);
return;
}
#endif
vectorImpl(a, c);
}
static void constant(A a, ResultType & c)
{
c = Op::apply(a);
}
};
关键点:
MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(vectorImpl,
MULTITARGET_FH(
static void NO_INLINE), /*vectorImpl*/ MULTITARGET_FB((const ArrayA & a, ArrayC & c) /// NOLINT
{
size_t size = a.size();
for (size_t i = 0; i < size; ++i)
c[i] = Op::apply(a[i]);
}))
MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42
#if ENABLE_MULTITARGET_CODE && defined(__GNUC__) && defined(__x86_64__)
/// NOLINTNEXTLINE
#define MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(name, FUNCTION_HEADER, FUNCTION_BODY) \
FUNCTION_HEADER \
\
AVX2_FUNCTION_SPECIFIC_ATTRIBUTE \
name##AVX2 \
FUNCTION_BODY \
\
FUNCTION_HEADER \
\
SSE42_FUNCTION_SPECIFIC_ATTRIBUTE \
name##SSE42 \
FUNCTION_BODY \
\
FUNCTION_HEADER \
\
name \
FUNCTION_BODY \
#else
/// NOLINTNEXTLINE
#define MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(name, FUNCTION_HEADER, FUNCTION_BODY) \
FUNCTION_HEADER \
\
name \
FUNCTION_BODY \
#endif
AbsImpl
template <typename A>
struct AbsImpl
{
using ResultType = std::conditional_t<is_decimal<A>, A, typename NumberTraits::ResultOfAbs<A>::Type>;
static const constexpr bool allow_fixed_string = false;
static const constexpr bool allow_string_integer = false;
static inline NO_SANITIZE_UNDEFINED ResultType apply(A a)
{
if constexpr (is_decimal<A>)
return a < A(0) ? A(-a) : a;
else if constexpr (is_big_int_v<A> && is_signed_v<A>)
return (a < 0) ? -a : a;
else if constexpr (is_integer<A> && is_signed_v<A>)
return a < 0 ? static_cast<ResultType>(~a) + 1 : static_cast<ResultType>(a);
else if constexpr (is_integer<A> && is_unsigned_v<A>)
return static_cast<ResultType>(a);
else if constexpr (std::is_floating_point_v<A>)
return static_cast<ResultType>(std::abs(a));
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = false; /// special type handling, some other time
#endif
};