AI-医学影像分割方法与流程

发布于:2025-03-17 ⋅ 阅读:(15) ⋅ 点赞:(0)

AI医学影像分割方法与流程–基于低场磁共振影像的病灶识别

– 作者:coder_fang
AI框架:PaddleSeg

  1. 数据准备,使用MedicalLabelMe进行dcm文件标注,产生同名.json文件。在这里插入图片描述

  2. 编写程序生成训练集图片,包括掩码图。

  3. 代码如下:

def doCvt(labelDir):
    outdir=labelDir+"\\output";
    if os.path.exists(outdir):
        shutil.rmtree(outdir)
    os.makedirs(outdir+"\\images") #原始图
    os.makedirs(outdir + "\\masks1") #可视掩码图,只包含0,255
    os.makedirs(outdir + "\\masks2") #训练掩码图,只包含0,1
    start=0
    files=glob.glob(labelDir+'\\*.json')

    for f in files:
        fname=os.path.basename(f)
        jsondata = json.load(open(labelDir+"\\"+ fname, encoding="utf-8"))
        imagePath = os.path.join(labelDir, jsondata.get("image_path"))

        ds = pydicom.read_file(imagePath)  # 读取.dcm文件
        img = ds.pixel_array  # 提取图像信息
        img = (img * (255/img.max())).astype(np.uint8)
        pathout ="{}\\images\\{:05d}.png".format(outdir,start)
		result=cv2.bilateralFilter(img,11,64,128) #对低场图片进行去噪声
        cv2.imwrite(pathout, result)


        label_name_to_value = {'_background_': 0,'H':1}
        for shape in jsondata.get("frames")[0].get("shapes"):
            label_name = shape['label']
            if label_name in label_name_to_value:
                label_value = label_name_to_value[label_name]
            else:
                label_value = len(label_name_to_value)
                label_name_to_value[label_name] = label_value
		mask = np.zeros(img.shape ,dtype=np.uint8)  # 假设图片是灰度图,调整dtype为np.uint8

        # 遍历所有形状并绘制到掩码上
        for shape in jsondata.get("frames")[0].get("shapes"):
            # 获取形状的类型和坐标点
            shape_type = shape['shape_type']
            points = shape['points']
            if shape_type == 'polygon':
                # 绘制多边形掩码
                cv2.fillPoly(mask, [np.array(points).reshape((-1, 1, 2)).astype(np.int32)], 1)
            elif shape_type == 'rectangle':
                # 绘制矩形掩码(转换为多边形)
                x, y, w, h = points
                pts = np.array([[x, y], [x + w, y], [x + w, y + h], [x, y + h]])
                cv2.fillPoly(mask, [pts.reshape((-1, 1, 2)).astype(np.int32)], 1)
            # 可以根据需要添加其他形状的处理方式,如圆形等。


        # 保存可视掩码图像(例如PNG格式)
        pathout = "{}\\masks1\\{:05d}.png".format(outdir, start)
        cv2.imwrite(pathout, (mask * 255).astype(np.uint8))
		# 保存训练掩码图像(例如PNG格式)
        pathout = "{}\\masks2\\{:05d}.png".format(outdir, start)
        cv2.imwrite(pathout, mask.astype(np.uint8))

        lbl_viz = imgviz.label2rgb(
            mask, img
        )
     
        start+=1

效果如图:
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

  1. 使用PaddlePaddle 2.6.2版本,单独下载PaddleSeg2.8.1版本进行编译,2.8以前版本会有问题。
  2. 生成训练数据和验证数据集对应列表:
def create_list(data_path):
    image_path = os.path.join(data_path, 'images')
    label_path = os.path.join(data_path, 'masks2')
    data_names = os.listdir(image_path)
    random.shuffle(data_names)  # 打乱数据
    with open(os.path.join(data_path, 'train_list.txt'), 'w') as tf:
        with open(os.path.join(data_path, 'val_list.txt'), 'w') as vf:
            for idx, data_name in enumerate(data_names):
                
                img = os.path.join('images', data_name)
                lab = os.path.join('masks2', data_name)
                if idx % 9 == 0:  # 90%的作为训练集
                    vf.write(img + ' ' + lab + '\n')
                else:
                    tf.write(img + ' ' + lab + '\n')
    print('数据列表生成完成')

data_path = 'work/output'
create_list(data_path)  # 生成数据列表
  1. 基于列表生成数据集并进行验证:
# 构建训练集
train_transforms = [
    T.RandomHorizontalFlip(),#水平翻转    
    T.RandomVerticalFlip(),
    
    T.RandomNoise(),
    T.Resize(),
    T.Normalize()  # 归一化
]
train_dataset = Dataset(
    transforms=train_transforms,
    dataset_root='work/output',
    num_classes=2,
    img_channels=1,
    mode='train',
    train_path='work/output/train_list.txt',
    separator=' ',
)
# 构建验证集
val_transforms = [
    T.Resize(),
    T.Normalize()
]
val_dataset = Dataset(
    transforms=val_transforms,
    dataset_root='work/output',
    num_classes=2,
    img_channels=1,
    mode='val',
    val_path='work/output/val_list.txt',
    separator=' ',
)

#随机抽取训练集数据进行可视化验证
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(10,16))
for i in range(1,6,2): 
    _,img,label,_= train_dataset[random.randint(0,100)].items()
    print(img[1].shape)
    img = np.transpose(img[1], (1,2,0))
    img = img*0.5 + 0.5
    plt.subplot(3,2,i),plt.imshow(img,'gray'),plt.title('img'),plt.xticks([]),plt.yticks([])
    plt.subplot(3,2,i+1),plt.imshow(label[1],'gray'),plt.title('label'),plt.xticks([]),plt.yticks([])
    plt.show

在这里插入图片描述

  1. 构建训练网络并训练:
#可以尝试PaddelSeg中多种分割模型,这里目前测试的BiSeNetV2均衡了效果和训练验证速度,表现还可以
model = BiSeNetV2(num_classes=2,#二分类分割
                 in_channels=1,#单通道
                 lambd=0.25,
                 align_corners=False,
                 pretrained='OutDir/best_model/model.pdparams'
                )



base_lr = 0.0005
lr = paddle.optimizer.lr.PolynomialDecay(learning_rate=base_lr, decay_steps=1500, verbose=False)
optimizer = paddle.optimizer.Momentum(lr, parameters=model.parameters(), momentum=0.9, weight_decay=4.0e-
losses = {}
losses['types'] = [CrossEntropyLoss()] *5
losses['coef'] = [0.8,0.2]*5

from paddleseg.core import train
train(
    model=model,
    train_dataset=train_dataset,
    val_dataset=val_dataset,
    optimizer=optimizer,
    save_dir='OutDir',
    iters=3000,
    batch_size=8,
    save_interval=200,
    log_iters=20,
    num_workers=0,
    losses=losses,
    use_vdl=True)#开始训练,这里最好使用GPU训练
  1. 验证数据,并将预测结果融合到原始图中
#测试结果 

transforms = T.Compose([
    T.Resize(target_size=(512, 512)),
    T.Normalize()
])

#显示图片时调整合适的窗宽窗位
def wwwl(im,window_width,window_level):
    # 计算最大值和最小值
    min_val = window_level - window_width / 2
    max_val = window_level + window_width / 2           
    #im = ((im - min_val) / (max_val - min_val)) * 255
    im=np.clip(im,min_val,max_val).astype(np.uint8)
    return im

#加载模型
model = BiSeNetV2(num_classes=2,    
                 in_channels=1,)

model_path = 'OutDir/best_model/model.pdparams'
para_state_dict = paddle.load(model_path)
model.set_dict(para_state_dict)

import matplotlib.pyplot as plt
import numpy as np

#testimg里放置一些病灶图片,尽量不是训练集里数据
files = glob.glob('work/testimg/*.png')
row=(int)(len(files))
plt.figure(figsize=(16,row*8))
i=1;
for f in files:
    #读取数据    
    im = cv2.imread(f, 0).astype('float32')
    im=F.normalize(im,0.5,0.5)
    
    h,w = im.shape
    data = np.expand_dims(im,axis=0).repeat(1,axis=0)
    
    data = data[np.newaxis, ...]
    data = paddle.to_tensor(data)
    predata=model(data)

    output = predata[0].numpy()

    output = np.argmax(output,axis=1)
    output = np.squeeze(output)
    #输入模型预测的时候缩放到(512,512),现在要缩放到原始数据的大小
    output = cv2.resize(output, (h, w), interpolation=cv2.INTER_NEAREST)
    output = output.astype(np.uint8)*128
    
    #归到255
    im=(im*0.5+0.5)*255

    im=wwwl(im,128,60)
    max_val = np.max(im)
    im=im*(255/max_val)
    
    imgori=cv2.cvtColor(im.astype(np.uint8), cv2.COLOR_GRAY2BGR);
    imgpre=cv2.cvtColor(output, cv2.COLOR_GRAY2BGR);
  
    imgpre[:,:,1]= 0
    imgpre[:,:,2]= 0
 
    mask_img=cv2.addWeighted(imgpre, 0.2, imgori, 0.5, 0)
   plt.subplot(row,2,i),plt.imshow(mask_img,'gray'),plt.title('img'),plt.xticks([]),plt.yticks([])
    
    i+=1
    
plt.show

在这里插入图片描述

  1. 基于上述模型持续优化并训练,如需模型工程化,可使用paddleseg 中的export.py ,将模型进行转换,转换后可将其集成到python或c++工程使用,转换命令如下:
python export.py --model_path OutDir/best_model/model.pdparams --input_shape 1 1 512 512 --save_dir OutDir/Saved --config work/config.yml 

完!