文章大纲
引言
书接上文继续补充LiteSession::CompileGraph(Model *model)的后续流程,前一篇文章介绍了Mindspore Lite AI 的核心LiteSession 的部分Init流程,这一篇接着往下介绍LiteSession处理Model对象的后续流程。
一、LiteSession::CompileGraph(Model *model)
在LiteSession中传入Model * (是从权重处理器的缓存区中读取出来的封封装而得到的Model *)到CompileGraph 时,先进行拆包转换Model->Tensor,不过先会先去判断Model 的模型类型,不同的模型使用不同的转换算法,其中Model 的子类有一个抽象父类AbstractBaseModel,目前只有MindIRModel 这个子类即用于描述MINDIR模型的。转为完成后,再把处理过的Model 传入用于初始化Graph,即vector< mslite:Tensor >
int LiteSession::CompileGraph(Model *model) {
auto ret = PreCheck(model);
MS_LOG(ERROR) << "NH#CompileGraph START";
...
if (model->model_type_ != ModelType_MSLite) {
MS_LOG(ERROR) << "NH#model typeModelType_MSLite: " << model->model_type_;
ret = reinterpret_cast<AbstractBaseModel *>(model)->ConvertTensors(&this->tensors_);
} else {
MS_LOG(ERROR) << "NH#model (model->model_type_" << model->model_type_;
// Convert to abstract base model interface
ret = ConvertTensors(model);
context_->set_schema_version(reinterpret_cast<LiteModel *>(model)->GetSchemaVersion());
}
...
ret = lite::PackWeightManager::GetInstance()->StoreOriginTensorData(model, &tensors_);
...
InitGraphInputTensors(model);
InitGraphOutputTensors(model);
PackedNodePass::GetInstance().Run(model, tensors_);
MS_LOG(ERROR) << "NH#CompileGraph create Scheduler";
// scheduler kernels
Scheduler scheduler(context_.get(), ms_context_, model, &tensors_, &inputs_, &outputs_, is_train_session_,&is_infershape_, &is_control_flow_, &infer_along_running_, execution_plan_, delegate_,delegate_device_type_);
scheduler.SetupSchedulerCb(std::move(sched_cb_));
scheduler.SetConfig(config_info_);
MS_LOG(ERROR) << "NH#CompileGraph scheduler.Schedule";
ret = scheduler.Schedule(&kernels_);
...
if (ms_context_->GetThreadNum() == 1 && !context_->IsCpuFloat16Enabled() && is_control_flow_) {
context_->DeleteThreadPool();
(void)context_->CreateThreadPool(is_control_flow_);
}
infer_along_running_ = infer_along_running_ && !is_control_flow_ && !is_train_session_ && (is_infershape_ != RET_OK);
InitGraphInOutTensorsMap(model);
non_tail_call_kernels_ = scheduler.NonTailCallNodes();
ret = PrepareKernels(model);
...
if (is_train_session_ || is_prepare_session_) {
is_running_.store(false);
return RET_OK;
}
ret = InitExecutor();
...
MarkSharedWeight(kernels_);
FreePackOpWeight(kernels_);
infer_along_running_ = infer_along_running_ && (runtime_allocator_ == nullptr);
if (infer_along_running_) {
this->context_->set_infer_checker(InferCheckerAll);
}
is_running_.store(false);
return RET_OK;
}
二、LiteSession::CompileGraph(Model *model) 核心流程
1、MindirModel::ConvertTensors
将模型中的张量(tensors)转换为Lite框架可以使用的格式
int MindirModel::ConvertTensors(std::vector<mindspore::lite::Tensor *> *lite_tensors) {
if (lite_tensors == nullptr) {
MS_LOG(ERROR) << "lite tensors is null.";
return mindspore::lite::RET_NULL_PTR;
}
//获取张量数量及输入输出索引
uint32_t tensor_count = this->all_mindir_tensors_.size();
auto model_input_indices = this->graph_.input_indices_;
auto model_output_indices = this->graph_.output_indices_;
//遍历所有的MindIR张量,并通过ConvertTensor方法将其转换为Lite张量
for (uint32_t i = 0; i < tensor_count; ++i) {
auto src_tensor = this->all_mindir_tensors_[i];
auto *dst_tensor = ConvertTensor(src_tensor);
...
if (mindspore::lite::IsContain(model_input_indices, i)) {
dst_tensor->set_category(mindspore::lite::Category::GRAPH_INPUT);
}
if (mindspore::lite::IsContain(model_output_indices, i)) {
// a tensor is as both input and output, would be treated as an input.
if (!dst_tensor->IsGraphInput()) {
dst_tensor->set_category(mindspore::lite::Category::GRAPH_OUTPUT);
}
}
auto ret = CheckTensorValid(dst_tensor);
....
lite_tensors->emplace_back(dst_tensor);
}
return mindspore::lite::RET_OK;
}
1.1、遍历并执行MindirModel::ConvertTensor
mindspore::lite::Tensor *MindirModel::ConvertTensor(TensorProtoWrap mindir_tensor_wrap) {
auto mindir_tensor = mindir_tensor_wrap.tensor_proto();
auto data_type = MindirModelUtil::ProtoTypeToTypeId(mindir_tensor.data_type());
std::vector<int> shape;
for (int i = 0; i < mindir_tensor.dims_size(); i++) {
shape.push_back(mindir_tensor.dims(i));
}
auto format = Format::NCHW;
mindspore::lite::NodeType node_type;
if (mindir_tensor.has_raw_data() || mindir_tensor.has_external_data()) {
node_type = mindspore::lite::NodeType_ValueNode;
} else {
node_type = mindspore::lite::NodeType_CNode;
}
auto category = TensorCategory(node_type, mindir_tensor.dims_size(), data_type, mindir_tensor.raw_data().size());
auto *lite_tensor = new mindspore::lite::Tensor(data_type, shape, format, category);
lite_tensor->set_tensor_name(mindir_tensor_wrap.name());
if (this->LoadTensorData(lite_tensor, mindir_tensor) != RET_OK) {
MS_LOG(WARNING) << "MindirModel: Convert tensor failed, load tensor data failed, tensor data will be empty.";
}
return lite_tensor;
}
1.1.1、MindirModel::LoadTensorData
int MindirModel::LoadTensorData(mindspore::lite::Tensor *lite_tensor, const mind_ir::TensorProto &mindir_tensor) {
if (mindir_tensor.has_raw_data()) {
return memcpy_s(lite_tensor->MutableData(), lite_tensor->Size(), mindir_tensor.raw_data().data(),
mindir_tensor.raw_data().size());
}
if (mindir_tensor.has_external_data()) {
std::string file = this->GetModelPath() + "/" + mindir_tensor.external_data().location();
// Read file
std::basic_ifstream<char> fid(file, std::ios::in | std::ios::binary);
...
fid.seekg(0, std::ios_base::end);
size_t file_size = static_cast<size_t>(fid.tellg());
fid.clear();
fid.seekg(0);
auto plain_data = std::make_unique<char[]>(file_size);
constexpr uint8_t is_little_endian = 1;
constexpr int byte_order_index = 0;
fid.read(plain_data.get(), file_size);
fid.close();
// if byte order is not same return false
if ((plain_data[byte_order_index] == is_little_endian) != common::IsLittleByteOrder()) {
MS_LOG(ERROR) << "The byte order of export MindIr device and load MindIr device is not same!";
return mindspore::lite::RET_ERROR;
}
const uint8_t *data = reinterpret_cast<const uint8_t *>(plain_data.get());
auto ret =
common::huge_memcpy(reinterpret_cast<uint8_t *>(lite_tensor->MutableData()), lite_tensor->Size(),
data + mindir_tensor.external_data().offset(), mindir_tensor.external_data().length());
return mindspore::lite::RET_OK;
}
return mindspore::lite::RET_NOT_SUPPORT;
}
三、LiteSession::InitGraphInputTensors(model)
void LiteSession::InitGraphInputTensors(const lite::Model *model) {
MS_ASSERT(model != nullptr);
auto graph_in_size = model->graph_.input_indices_.size();
MS_LOG(ERROR) << "NH#InitGraphInputTensors in_size: " << graph_in_size;
for (size_t i = 0; i < graph_in_size; ++i) {
auto in_tensor_idx = model->graph_.input_indices_[i];
MS_ASSERT(in_tensor_idx < this->tensors_.size());
auto *in_tensor = this->tensors_.at(in_tensor_idx);
MS_ASSERT(in_tensor != nullptr);
this->inputs_.emplace_back(in_tensor);
}
}
四、LiteSession::InitGraphOutputTensors(model)
void LiteSession::InitGraphOutputTensors(const lite::Model *model) {
MS_ASSERT(model != nullptr);
MS_ASSERT(this->outputs_.empty());
auto graph_out_size = model->graph_.output_indices_.size();
for (size_t i = 0; i < graph_out_size; ++i) {
auto out_tensor_idx = model->graph_.output_indices_[i];
MS_ASSERT(out_tensor_idx < this->tensors_.size());
auto *out_tensor = this->tensors_.at(out_tensor_idx);
MS_ASSERT(out_tensor != nullptr);
this->outputs_.emplace_back(out_tensor);
}
}
五、Scheduler::Schedule(std::vector<mindspore::kernel::KernelExec *> *dst_kernels)
Scheduler::Schedule 何时调用?NNRT 方式由LiteSession::CompileGraph 触发进入到Shedule函数内部后CPU的话传入到InitDelegateKernels里的DelegateKernels为nullptr 直接返回;nnrt方式的话就会把前面LiteSession::CreateNNRTDelegate() Relace上去 DelegateKernels,
所以CPU方式加载模型就是直接把模型文件的Buffer 转为相应的对象,存储到LiteSession的std::vector<mindspore::lite::Tensor *> inputs_里,走nnrt需要先创建对应的Delegate,走mindspore则不需要创建,直接返回ret_ok
int Scheduler::Schedule(std::vector<kernel::KernelExec *> *dst_kernels) {
MS_LOG(DEBUG) << "Start schedule.";
int check_input_ret = CheckInputParam(dst_kernels);
...
shape_fusion_pass_ =
std::make_shared<ShapeFusionPass>(context_, reinterpret_cast<LiteModel *>(src_model_), src_tensors_);
int ret = SchedulePreProcess();
...
if (*is_control_flow_) {
control_flow_scheduler_ = std::make_shared<ControlFlowScheduler>(context_, ms_context_, src_tensors_);
}
ret = ScheduleGraphToKernels(dst_kernels);
FreeOpParameters();
op_parameters_.clear();
...
if (context_->float_mode) {
kernel::KernelExecUtil::FindAllInoutKernels(*dst_kernels);
ret = DelQuantDTypeCastKernel(dst_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Delete quant_dtype_cast kernel failed.";
return ret;
}
}
shape_fusion_pass_->StoreStateAndReset();
MS_LOG(DEBUG) << "Start to init delegate kernels.";
ret = InitDelegateKernels(dst_kernels);
...
MS_LOG(DEBUG) << "Finish to init delegate kernels.";
ret = CheckCpuValid(dst_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "kernels invalid in set devices.";
return ret;
}
kernel::KernelExecUtil::FindAllInoutKernels(*dst_kernels);
ret = ConstructSubGraphs(dst_kernels);
ret = ProcessSubGraphTranspose(dst_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Process SubGraph with multi layout failed.";
return ret;
}
if (*is_control_flow_) {
control_flow_scheduler_->SetSubgraphForPartialNode(&partial_kernel_subgraph_index_map_, &subgraph_index_subgraph_kernel_map_);
ret = control_flow_scheduler_->Schedule(dst_kernels);
}
auto status = RuntimePass(dst_kernels, src_tensors_);
ret = InitKernels(std::move(*dst_kernels));
if (ret != RET_OK) {
MS_LOG(ERROR) << "InitKernels failed.";
return ret;
}
shape_fusion_pass_->RestoreState();
return RET_OK;
}
未完待续