摘要:
由于DimensionGroupVirtual::Clone的执行代价过高, 本文分析该函数是否被调用, 如果被调用, 则分析其场景。
函数原型:
DimensionGroup *DimensionGroupVirtual::Clone(bool shallow) {
DimensionGroupVirtual *new_value = new DimensionGroupVirtual(dims_used, base_dim, f, (shallow ? 1 : 0));
if (shallow) return new_value;
for (int i = 0; i < no_dims; i++) {
if (t[i]) {
new_value->nulls_possible[i] = nulls_possible[i];
t[i]->Lock();
new_value->t[i] = new IndexTable(*t[i]);
t[i]->Unlock();
}
}
return new_value;
}
调用分析:
一. 分析DimensionGroupVirtual类的创建
来自MINewContents::Commit
void MINewContents::Commit([[maybe_unused]] int64_t joined_tuples) // commit changes to multiindex - must be called
// at the end, or changes will be lost
{
MEASURE_FET("MINewContents::Commit(...)");
std::vector<int> no_locks(no_dims);
for (int dim = 0; dim < no_dims; dim++)
if (dim_involved[dim]) {
no_locks[dim] = mind->group_for_dim[dim]->NumOfLocks(dim);
}
// dim_involved contains full original groups (to be deleted)
for (int dim = 0; dim < no_dims; dim++)
if (dim_involved[dim]) {
int group_no = mind->group_num_for_dim[dim];
if (mind->dim_groups[group_no]) { // otherwise already deleted
delete mind->dim_groups[group_no];
mind->dim_groups[group_no] = NULL;
}
}
if (f_opt) f_opt->Commit();
// Now all involved groups must be replaced by a new contents
if (content_type == enumMINCType::MCT_FILTER_FORGET) { // optimized version: just exchange filters
DimensionGroupFilter *nf =
new DimensionGroupFilter(optimized_dim_stay, f_opt, 2,
pack_power); // mode 2: pass Filter ownership to the DimensionGroup
f_opt = NULL;
nf->Lock(optimized_dim_stay, no_locks[optimized_dim_stay]);
mind->dim_groups.push_back(nf);
DimensionVector dims_to_forget(dim_involved);
dims_to_forget[optimized_dim_stay] = false;
DimensionGroupMaterialized *ng = new DimensionGroupMaterialized(dims_to_forget); // forgotten dimensions
mind->dim_groups.push_back(ng);
ng->SetNumOfObj(1); // set a dummy size 1 for a group containing forgotten
// dimensions only
} else if (content_type == enumMINCType::MCT_VIRTUAL_DIM) { // optimized version: virtual dimension group
DimensionGroupVirtual *nv = new DimensionGroupVirtual(dim_involved, optimized_dim_stay, f_opt,
2); // mode 2: pass Filter ownership to the DimensionGroup
f_opt = NULL;
nv->Lock(optimized_dim_stay, no_locks[optimized_dim_stay]);
mind->dim_groups.push_back(nv);
for (int dim = 0; dim < no_dims; dim++) {
if (dim_involved[dim] && !forget_now[dim] && dim != optimized_dim_stay) {
t_new[dim]->SetNumOfLocks(no_locks[dim]);
nv->NewDimensionContent(dim, t_new[dim], nulls_possible[dim]);
t_new[dim] = NULL; // ownership transferred to the DimensionGroup
}
}
} else {
// now we should exchange existing joined dimensions into the newly
// calculated ones
if (roughsorter) {
roughsorter->Commit(obj); // sort roughly the current t_new contents, if needed
roughsorter->Barrier();
}
DimensionGroupMaterialized *ng = new DimensionGroupMaterialized(dim_involved); // involving also forgotten
mind->dim_groups.push_back(ng);
ng->SetNumOfObj(obj);
for (int dim = 0; dim < no_dims; dim++) {
if (dim_involved[dim] && !forget_now[dim]) {
t_new[dim]->SetNumOfLocks(no_locks[dim]);
ng->NewDimensionContent(dim, t_new[dim], nulls_possible[dim]);
t_new[dim] = NULL; // ownership transferred to the DimensionGroup
}
}
}
mind->FillGroupForDim();
mind->UpdateNumOfTuples();
for (int dim = 0; dim < no_dims; dim++)
if (dim_involved[dim] && !forget_now[dim]) mind->UnlockFromGetIndex(dim);
}
} else if (content_type == enumMINCType::MCT_VIRTUAL_DIM) { // optimized version: virtual dimension group
DimensionGroupVirtual *nv = new DimensionGroupVirtual(dim_involved, optimized_dim_stay, f_opt,
2); // mode 2: pass Filter ownership to the DimensionGroup
分析content_type 何时被赋值为 enumMINCType::MCT_VIRTUAL_DIM
void MINewContents::Init(int64_t initial_size) // initialize temporary structures (set approximate size)
{
MEASURE_FET("MINewContents::Init(...)");
// check for special (filter + forgotten) case
for (int dim = 0; dim < no_dims; dim++) {
if (dim_involved[dim] && !forget_now[dim]) {
if (optimized_dim_stay != -1) { // more than one unforgotten found
optimized_dim_stay = -1;
break;
}
optimized_dim_stay = dim;
} // optimized_dim_stay > -1 if there is exactly one unforgotten dimension
}
if (optimized_dim_stay != -1 && mind->GetFilter(optimized_dim_stay) == NULL)
optimized_dim_stay = -1; // filter case only
if (optimized_dim_stay != -1)
content_type = enumMINCType::MCT_FILTER_FORGET;
else {
// check for Virtual Dimension case
for (int dim = 0; dim < no_dims; dim++) {
if (dim_involved[dim] && !forget_now[dim] && mind->MaxNumOfPacks(dim) > 1) {
if (optimized_dim_stay != -1) { // more than one large found
optimized_dim_stay = -1;
break;
}
optimized_dim_stay = dim;
} // optimized_dim_stay > -1 if there is exactly one unforgotten
// dimension
}
if (optimized_dim_stay != -1 && mind->GetFilter(optimized_dim_stay) == NULL)
optimized_dim_stay = -1; // filter case only
if (optimized_dim_stay != -1) content_type = enumMINCType::MCT_VIRTUAL_DIM;
for (int dim = 0; dim < no_dims; dim++) {
if (dim_involved[dim] && !forget_now[dim]) {
if (optimized_dim_stay != -1) { // more than one unforgotten found
optimized_dim_stay = -1;
break;
}
optimized_dim_stay = dim;
} // optimized_dim_stay > -1 if there is exactly one unforgotten dimension
}
if (optimized_dim_stay != -1 && mind->GetFilter(optimized_dim_stay) == NULL)
optimized_dim_stay = -1; // filter case only
分析:
- optimized_dim_stay在类MINewContents构造函数中被初始化为-1
- 只有当optimized_dim_stay已经被赋值为不是-1的值,同时(dim_involved[dim] && !forget_now[dim])才会重新赋值为-1
- 那么就需要查看MINewContents::Init是否被多次调用导致optimized_dim_stay可能存在不是-1的情况
查看 MINewContents::Init 的调用的次数
在其他的joiner处理中只会init一次
此时 optimized_dim_stay = dim, 且不等于-1
那么就只剩下接下来的条件
if (optimized_dim_stay != -1 && mind->GetFilter(optimized_dim_stay) == NULL)
optimized_dim_stay = -1; // filter case only
需要分析是否存在 mind->GetFilter(optimized_dim_stay) == NULL 的场景
Filter *GetFilter(int dim) const // Get the pointer to a filter attached to a dimension.
// NOTE: will be NULL in case of materialized MultiIndex!
{
return no_dimensions > 0 ? group_for_dim[dim]->GetFilter(dim) : NULL;
}
此处dim一定>0