鸿蒙进阶——Mindspore Lite AI框架源码解读之模型加载详解(五)

发布于:2025-07-01 ⋅ 阅读:(23) ⋅ 点赞:(0)

引言

书接上文继续补充LiteSession::CompileGraph(Model *model)的后续流程,前一篇文章介绍了Mindspore Lite AI 的核心LiteSession 的部分Init流程,这一篇接着往下介绍LiteSession处理Model对象的后续流程。

一、LiteSession::CompileGraph(Model *model)

在LiteSession中传入Model * (是从权重处理器的缓存区中读取出来的封封装而得到的Model *)到CompileGraph 时,先进行拆包转换Model->Tensor,不过先会先去判断Model 的模型类型,不同的模型使用不同的转换算法,其中Model 的子类有一个抽象父类AbstractBaseModel,目前只有MindIRModel 这个子类即用于描述MINDIR模型的。转为完成后,再把处理过的Model 传入用于初始化Graph,即vector< mslite:Tensor >

int LiteSession::CompileGraph(Model *model) {
  auto ret = PreCheck(model);
  MS_LOG(ERROR) << "NH#CompileGraph START";
...
  if (model->model_type_ != ModelType_MSLite) {
    MS_LOG(ERROR) << "NH#model typeModelType_MSLite: " << model->model_type_;
    ret = reinterpret_cast<AbstractBaseModel *>(model)->ConvertTensors(&this->tensors_);
  } else {
    MS_LOG(ERROR) << "NH#model (model->model_type_" << model->model_type_;
    // Convert to abstract base model interface
    ret = ConvertTensors(model);
    context_->set_schema_version(reinterpret_cast<LiteModel *>(model)->GetSchemaVersion());
  }
...
  ret = lite::PackWeightManager::GetInstance()->StoreOriginTensorData(model, &tensors_);
...
  InitGraphInputTensors(model);
  InitGraphOutputTensors(model);
  PackedNodePass::GetInstance().Run(model, tensors_);
  MS_LOG(ERROR) << "NH#CompileGraph  create Scheduler";
  // scheduler kernels
  Scheduler scheduler(context_.get(), ms_context_, model, &tensors_, &inputs_, &outputs_, is_train_session_,&is_infershape_, &is_control_flow_, &infer_along_running_, execution_plan_, delegate_,delegate_device_type_);
  scheduler.SetupSchedulerCb(std::move(sched_cb_));
  scheduler.SetConfig(config_info_);
  MS_LOG(ERROR) << "NH#CompileGraph scheduler.Schedule";
  ret = scheduler.Schedule(&kernels_);
  ...
  if (ms_context_->GetThreadNum() == 1 && !context_->IsCpuFloat16Enabled() && is_control_flow_) {
    context_->DeleteThreadPool();
    (void)context_->CreateThreadPool(is_control_flow_);
  }
  infer_along_running_ = infer_along_running_ && !is_control_flow_ && !is_train_session_ && (is_infershape_ != RET_OK);
  InitGraphInOutTensorsMap(model);
  non_tail_call_kernels_ = scheduler.NonTailCallNodes();
  ret = PrepareKernels(model);
  ...
  if (is_train_session_ || is_prepare_session_) {
    is_running_.store(false);
    return RET_OK;
  }

  ret = InitExecutor();
  ...
  MarkSharedWeight(kernels_);
  FreePackOpWeight(kernels_);
  infer_along_running_ = infer_along_running_ && (runtime_allocator_ == nullptr);
  if (infer_along_running_) {
    this->context_->set_infer_checker(InferCheckerAll);
  }
  is_running_.store(false);
  return RET_OK;
}

二、LiteSession::CompileGraph(Model *model) 核心流程

1、MindirModel::ConvertTensors

将模型中的张量(tensors)转换为Lite框架可以使用的格式

int MindirModel::ConvertTensors(std::vector<mindspore::lite::Tensor *> *lite_tensors) {
  if (lite_tensors == nullptr) {
    MS_LOG(ERROR) << "lite tensors is null.";
    return mindspore::lite::RET_NULL_PTR;
  }
//获取张量数量及输入输出索引
  uint32_t tensor_count = this->all_mindir_tensors_.size();
  auto model_input_indices = this->graph_.input_indices_;
  auto model_output_indices = this->graph_.output_indices_;
//遍历所有的MindIR张量,并通过ConvertTensor方法将其转换为Lite张量
  for (uint32_t i = 0; i < tensor_count; ++i) {
    auto src_tensor = this->all_mindir_tensors_[i];
    auto *dst_tensor = ConvertTensor(src_tensor);
   ...
    if (mindspore::lite::IsContain(model_input_indices, i)) {
      dst_tensor->set_category(mindspore::lite::Category::GRAPH_INPUT);
    }
    if (mindspore::lite::IsContain(model_output_indices, i)) {
      // a tensor is as both input and output, would be treated as an input.
      if (!dst_tensor->IsGraphInput()) {
        dst_tensor->set_category(mindspore::lite::Category::GRAPH_OUTPUT);
      }
    }
    auto ret = CheckTensorValid(dst_tensor);
    ....
    lite_tensors->emplace_back(dst_tensor);
  }
  return mindspore::lite::RET_OK;
}

1.1、遍历并执行MindirModel::ConvertTensor

mindspore::lite::Tensor *MindirModel::ConvertTensor(TensorProtoWrap mindir_tensor_wrap) {
  auto mindir_tensor = mindir_tensor_wrap.tensor_proto();
  auto data_type = MindirModelUtil::ProtoTypeToTypeId(mindir_tensor.data_type());
  std::vector<int> shape;
  for (int i = 0; i < mindir_tensor.dims_size(); i++) {
    shape.push_back(mindir_tensor.dims(i));
  }
  auto format = Format::NCHW;
  mindspore::lite::NodeType node_type;
  if (mindir_tensor.has_raw_data() || mindir_tensor.has_external_data()) {
    node_type = mindspore::lite::NodeType_ValueNode;
  } else {
    node_type = mindspore::lite::NodeType_CNode;
  }
  auto category = TensorCategory(node_type, mindir_tensor.dims_size(), data_type, mindir_tensor.raw_data().size());
  auto *lite_tensor = new mindspore::lite::Tensor(data_type, shape, format, category);
  lite_tensor->set_tensor_name(mindir_tensor_wrap.name());
  if (this->LoadTensorData(lite_tensor, mindir_tensor) != RET_OK) {
    MS_LOG(WARNING) << "MindirModel: Convert tensor failed, load tensor data failed, tensor data will be empty.";
  }
  return lite_tensor;
}
1.1.1、MindirModel::LoadTensorData
int MindirModel::LoadTensorData(mindspore::lite::Tensor *lite_tensor, const mind_ir::TensorProto &mindir_tensor) {
  if (mindir_tensor.has_raw_data()) {
    return memcpy_s(lite_tensor->MutableData(), lite_tensor->Size(), mindir_tensor.raw_data().data(),
                    mindir_tensor.raw_data().size());
  }
  if (mindir_tensor.has_external_data()) {
    std::string file = this->GetModelPath() + "/" + mindir_tensor.external_data().location();
    // Read file
    std::basic_ifstream<char> fid(file, std::ios::in | std::ios::binary);
    ...
    fid.seekg(0, std::ios_base::end);
    size_t file_size = static_cast<size_t>(fid.tellg());
    fid.clear();
    fid.seekg(0);
    auto plain_data = std::make_unique<char[]>(file_size);
    constexpr uint8_t is_little_endian = 1;
    constexpr int byte_order_index = 0;
    fid.read(plain_data.get(), file_size);
    fid.close();
    // if byte order is not same return false
    if ((plain_data[byte_order_index] == is_little_endian) != common::IsLittleByteOrder()) {
      MS_LOG(ERROR) << "The byte order of export MindIr device and load MindIr device is not same!";
      return mindspore::lite::RET_ERROR;
    }
    const uint8_t *data = reinterpret_cast<const uint8_t *>(plain_data.get());
    auto ret =
      common::huge_memcpy(reinterpret_cast<uint8_t *>(lite_tensor->MutableData()), lite_tensor->Size(),
                          data + mindir_tensor.external_data().offset(), mindir_tensor.external_data().length());
    return mindspore::lite::RET_OK;
  }
  return mindspore::lite::RET_NOT_SUPPORT;
}

三、LiteSession::InitGraphInputTensors(model)

void LiteSession::InitGraphInputTensors(const lite::Model *model) {
  MS_ASSERT(model != nullptr);
  auto graph_in_size = model->graph_.input_indices_.size();
  MS_LOG(ERROR) << "NH#InitGraphInputTensors in_size: " << graph_in_size;
  for (size_t i = 0; i < graph_in_size; ++i) {
    auto in_tensor_idx = model->graph_.input_indices_[i];
    MS_ASSERT(in_tensor_idx < this->tensors_.size());
    auto *in_tensor = this->tensors_.at(in_tensor_idx);
    MS_ASSERT(in_tensor != nullptr);
    this->inputs_.emplace_back(in_tensor);
  }
}

四、LiteSession::InitGraphOutputTensors(model)

void LiteSession::InitGraphOutputTensors(const lite::Model *model) {
  MS_ASSERT(model != nullptr);
  MS_ASSERT(this->outputs_.empty());
  auto graph_out_size = model->graph_.output_indices_.size();
  for (size_t i = 0; i < graph_out_size; ++i) {
    auto out_tensor_idx = model->graph_.output_indices_[i];
    MS_ASSERT(out_tensor_idx < this->tensors_.size());
    auto *out_tensor = this->tensors_.at(out_tensor_idx);
    MS_ASSERT(out_tensor != nullptr);
    this->outputs_.emplace_back(out_tensor);
  }
}

五、Scheduler::Schedule(std::vector<mindspore::kernel::KernelExec *> *dst_kernels)

Scheduler::Schedule 何时调用?NNRT 方式由LiteSession::CompileGraph 触发进入到Shedule函数内部后CPU的话传入到InitDelegateKernels里的DelegateKernels为nullptr 直接返回;nnrt方式的话就会把前面LiteSession::CreateNNRTDelegate() Relace上去 DelegateKernels,
所以CPU方式加载模型就是直接把模型文件的Buffer 转为相应的对象,存储到LiteSession的std::vector<mindspore::lite::Tensor *> inputs_里,走nnrt需要先创建对应的Delegate,走mindspore则不需要创建,直接返回ret_ok

int Scheduler::Schedule(std::vector<kernel::KernelExec *> *dst_kernels) {
  MS_LOG(DEBUG) << "Start schedule.";
  int check_input_ret = CheckInputParam(dst_kernels);
...
  shape_fusion_pass_ =
    std::make_shared<ShapeFusionPass>(context_, reinterpret_cast<LiteModel *>(src_model_), src_tensors_);
    int ret = SchedulePreProcess();
...
  if (*is_control_flow_) {
    control_flow_scheduler_ = std::make_shared<ControlFlowScheduler>(context_, ms_context_, src_tensors_);
  }

  ret = ScheduleGraphToKernels(dst_kernels);
  FreeOpParameters();
  op_parameters_.clear();
 ...
  if (context_->float_mode) {
    kernel::KernelExecUtil::FindAllInoutKernels(*dst_kernels);
    ret = DelQuantDTypeCastKernel(dst_kernels);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "Delete quant_dtype_cast kernel failed.";
      return ret;
    }
  }
  shape_fusion_pass_->StoreStateAndReset();

  MS_LOG(DEBUG) << "Start to init delegate kernels.";
  ret = InitDelegateKernels(dst_kernels);
 ...
  MS_LOG(DEBUG) << "Finish to init delegate kernels.";
  ret = CheckCpuValid(dst_kernels);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "kernels invalid in set devices.";
    return ret;
  }
  kernel::KernelExecUtil::FindAllInoutKernels(*dst_kernels);
  ret = ConstructSubGraphs(dst_kernels);
  ret = ProcessSubGraphTranspose(dst_kernels);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Process SubGraph with multi layout failed.";
    return ret;
  }

  if (*is_control_flow_) {
control_flow_scheduler_->SetSubgraphForPartialNode(&partial_kernel_subgraph_index_map_,                                              &subgraph_index_subgraph_kernel_map_);
    ret = control_flow_scheduler_->Schedule(dst_kernels);
  }
  auto status = RuntimePass(dst_kernels, src_tensors_);

  ret = InitKernels(std::move(*dst_kernels));
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "InitKernels failed.";
    return ret;
  }
  shape_fusion_pass_->RestoreState();
  return RET_OK;
}

未完待续


网站公告

今日签到

点亮在社区的每一天
去签到