【网页端数字人开发】基于babylonjs+mediapipe实现视频驱动数字人姿态生成

发布于:2025-06-12 ⋅ 阅读:(14) ⋅ 点赞:(0)

零、环境准备

安装所依赖的库

//安装babylonjs
npm install --save babylonjs

//安装模型加载依赖的库
npm install babylonjs-loaders

//安装mediapipe
npm install mediapipe

一、使用mediapipe进行关键点识别

使用mediapipe对视频进行关键点识别,实时输出关键点位置
在这里插入图片描述

二、对模型进行骨骼旋转、移动操作

关键代码

//加载模型
const result = await BABYLON.SceneLoader.ImportMeshAsync(
  null,
  "/",
  "man.glb",
  scene
);

// 获取第一个骨骼系统
const skeleton = result.skeletons[0];

//获取单个骨骼节点
const leftshoulder = skeleton.bones.find((b) =>b.name.toLowerCase().includes("leftarm")).getTransformNode();

//骨骼旋转
leftshoulder.rotate(BABYLON.Axis.Y, Math.PI/2, BABYLON.Space.LOCAL);

三、将mediapipe得到的关键点数据与模型骨骼进行映射(未实现)

相关参考:https://blog.csdn.net/qq_58484580/article/details/132661430

四、相关代码代码

index.html

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8" />
  <title>MediaPipe Pose 驱动数字人</title>
  <style>
    body {
      font-family: Arial, sans-serif;
      margin: 20px;
    }

    #topSection, #bottomSection {
      display: flex;
      justify-content: center;
      gap: 20px;
      margin-bottom: 20px;
    }

    video, canvas {
      width: 640px;
      height: 480px;
      border: 1px solid #ccc;
      background: #000;
    }

    #avatarCanvas {
      width: 640px;
      height: 480px;
      border: 1px solid #ccc;
      background: #111;
    }

    #output {
      width: 640px;
      height: 480px;
      overflow: auto;
      background: #eee;
      padding: 10px;
      white-space: pre-wrap;
      font-size: 12px;
      line-height: 1.4em;
      border: 1px solid #ccc;
    }
  </style>
</head>
<body>
  <h2>MediaPipe 驱动数字人</h2>
  <button id="saveBtn">保存姿态关键点</button>
  <input type="file" id="videoFileInput" accept="video/*" />

  <div id="topSection">
    <video id="inputVideo" controls></video>
    <canvas id="outputCanvas"></canvas>
  </div>

  <div id="bottomSection">
    <canvas id="avatarCanvas"></canvas>
    <pre id="output">等待加载视频...</pre>
  </div>

  <script type="module" src="src/main.js"></script>
</body>
</html>

main.js

import { Pose, POSE_CONNECTIONS } from "@mediapipe/pose";
//导入babylonjs
import * as BABYLON from "babylonjs";
//导入gltf加载器
import "babylonjs-loaders";

// 视频输入界面
const videoElement = document.getElementById("inputVideo");
// 输出关键点坐标界面
const canvasElement = document.getElementById("outputCanvas");
// 骨骼动画绘制界面
const canvasCtx = canvasElement.getContext("2d");
// 输出关键点坐标
const outputDiv = document.getElementById("output");
const fileInput = document.getElementById("videoFileInput");
// 保存关键点按钮
document.getElementById("saveBtn").addEventListener("click", () => {
  saveLandmarksToTxt(allLandmarkFrames);
});
// 数字人展示界面
const avatarCanvas = document.getElementById("avatarCanvas");

const allLandmarkFrames = []; // 存储每一帧的姿态数据

// === 功能模块 1:设置姿态识别模型和回调 ===
function setupPoseDetection(onLandmarksDetected) {
  const pose = new Pose({
    locateFile: (file) =>
      `https://cdn.jsdelivr.net/npm/@mediapipe/pose/${file}`,
  });

  pose.setOptions({
    modelComplexity: 1,
    smoothLandmarks: true,
    minDetectionConfidence: 0.5,
    minTrackingConfidence: 0.5,
  });

  pose.onResults((results) => {
    canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);

    if (!results.poseLandmarks) {
      outputDiv.textContent = "未检测到姿态";
      return;
    }
    // 深拷贝 landmarks,防止后续引用被改变,用于保存关键点数据
    allLandmarkFrames.push(JSON.parse(JSON.stringify(results.poseLandmarks)));

    const landmarks = results.poseLandmarks;

    drawSkeleton(canvasCtx, landmarks);
    displayLandmarksText(landmarks);

    // 触发自定义处理(例如驱动数字人)
    if (onLandmarksDetected) {
      onLandmarksDetected(landmarks);
    }
  });

  return pose;
}

// === 工具函数1:绘制骨骼线和关键点 ===
function drawSkeleton(ctx, landmarks) {
  ctx.strokeStyle = "#00FF00";
  ctx.lineWidth = 2;

  POSE_CONNECTIONS.forEach(([startIdx, endIdx]) => {
    const start = landmarks[startIdx];
    const end = landmarks[endIdx];
    if (start.visibility > 0.5 && end.visibility > 0.5) {
      ctx.beginPath();
      ctx.moveTo(start.x * canvasElement.width, start.y * canvasElement.height);
      ctx.lineTo(end.x * canvasElement.width, end.y * canvasElement.height);
      ctx.stroke();
    }
  });

  landmarks.forEach((lm) => {
    if (lm.visibility > 0.5) {
      ctx.beginPath();
      ctx.arc(
        lm.x * canvasElement.width,
        lm.y * canvasElement.height,
        5,
        0,
        2 * Math.PI
      );
      ctx.fillStyle = "#FF0000";
      ctx.fill();
    }
  });
}

// === 工具函数2:文本输出关键点 ===
function displayLandmarksText(landmarks) {
  outputDiv.textContent = landmarks
    .map(
      (lm, i) =>
        `点${i}: x=${lm.x.toFixed(3)}, y=${lm.y.toFixed(3)}, z=${lm.z.toFixed(
          3
        )}, visibility=${lm.visibility.toFixed(3)}`
    )
    .join("\n");
}

// === 工具函数3:保存关键点数据为txt文件 ===
function saveLandmarksToTxt(frames, filename = "pose_data.txt") {
  if (!frames || frames.length === 0) {
    alert("没有可保存的数据");
    return;
  }

  const lines = frames.map((landmarks, frameIdx) => {
    const header = `第 ${frameIdx + 1} 帧`;
    const points = landmarks.map(
      (lm, i) =>
        `点${i}: x=${lm.x.toFixed(6)}, y=${lm.y.toFixed(6)}, z=${lm.z.toFixed(
          6
        )}, visibility=${lm.visibility.toFixed(6)}`
    );
    return [header, ...points].join("\n");
  });

  const content = lines.join("\n\n");

  const blob = new Blob([content], { type: "text/plain" });
  const url = URL.createObjectURL(blob);

  const a = document.createElement("a");
  a.href = url;
  a.download = filename;
  a.click();

  URL.revokeObjectURL(url);
}

// === 工具函数4:加载模型并返回实例 ===
// utils/loadPoseTxt.js
async function loadPoseTxtFile(path) {
  const response = await fetch(path);
  if (!response.ok) {
    throw new Error(`无法读取文件:${path}`);
  }

  const text = await response.text();
  const lines = text.split("\n");
  const frames = [];
  let currentFrame = [];

  for (const line of lines) {
    const trimmed = line.trim();

    // 检测到新的一帧
    if (trimmed.startsWith("第") && trimmed.includes("帧")) {
      if (currentFrame.length > 0) {
        frames.push(currentFrame);
        currentFrame = [];
      }
    }

    // 匹配点数据行
    const match = trimmed.match(
      /x=([-.\d]+),\s*y=([-.\d]+),\s*z=([-.\d]+),\s*visibility=([-.\d]+)/
    );
    if (match) {
      const [_, x, y, z, visibility] = match.map(Number);
      currentFrame.push([x, y, z, visibility]);
    }
  }

  // 最后一帧加进去
  if (currentFrame.length > 0) {
    frames.push(currentFrame);
  }

  return frames;
}

// === 功能模块 2:处理视频加载、播放、逐帧发送 ===
function handleVideoInput(videoEl, fileInputEl, poseInstance) {
  fileInputEl.onchange = () => {
    const file = fileInputEl.files[0];
    if (!file) return;
    const url = URL.createObjectURL(file);
    videoEl.src = url;
    videoEl.play();
    outputDiv.textContent = "视频加载中...";
  };

  videoEl.onloadedmetadata = () => {
    canvasElement.width = videoEl.videoWidth;
    canvasElement.height = videoEl.videoHeight;
    canvasElement.style.width = videoEl.clientWidth + "px";
    canvasElement.style.height = videoEl.clientHeight + "px";
  };

  videoEl.onplay = () => {
    const processFrame = async () => {
      if (videoEl.paused || videoEl.ended) return;
      await poseInstance.send({ image: videoEl });
      requestAnimationFrame(processFrame);
    };
    processFrame();
  };
}

//  === 功能模块 3:数字人驱动逻辑 ===
//创建引擎
const engine = new BABYLON.Engine(avatarCanvas, true);
const scene = new BABYLON.Scene(engine);
//相机
const camera = new BABYLON.ArcRotateCamera(
  "camera",
  Math.PI / 2,
  Math.PI / 2,
  4,
  new BABYLON.Vector3(0, 0.9, -1),
  scene
);
// 摄像头跟随鼠标移动
// camera.attachControl(avatarCanvas, true);
//光源
const light = new BABYLON.DirectionalLight(
  "light",
  new BABYLON.Vector3(0, -1, -1),
  scene
);
light.intensity = 10;
//加载模型
const result = await BABYLON.SceneLoader.ImportMeshAsync(
  null,
  "/",
  "man.glb",
  scene
);

//读取txt文件关键点数据
const poseData = await loadPoseTxtFile("/pose_data.txt");
console.log(poseData[0]);

//打印骨骼层级信息
function printBoneHierarchy(skeleton) {
  const bones = skeleton.bones;
  // 找到根骨骼(parent 为 null)
  const rootBones = bones.filter((bone) => bone.getParent() === null);
  function traverse(bone, level = 0) {
    const indent = "  ".repeat(level);
    console.log(`${indent}- ${bone.name}`);
    const children = bones.filter((b) => b.getParent() === bone);
    children.forEach((child) => traverse(child, level + 1));
  }
  rootBones.forEach((rootBone) => {
    traverse(rootBone);
  });
}
printBoneHierarchy(result.skeletons[0]);

const skeleton = result.skeletons[0]; // 获取第一个骨骼系统

//获取需要变化的骨骼
// 左边
const leftshoulder = skeleton.bones.find((b) =>b.name.toLowerCase().includes("leftarm")).getTransformNode(); //11
const leftelbow = skeleton.bones.find((b) =>b.name.toLowerCase().includes("leftforearm")).getTransformNode(); //13
const leftwrist = skeleton.bones.find((b) =>b.name.toLowerCase().includes("lefthand")).getTransformNode(); //15
const lefthip = skeleton.bones.find((b) =>b.name.toLowerCase().includes("leftupleg")).getTransformNode();//23
const leftknee = skeleton.bones.find((b) =>b.name.toLowerCase().includes("leftleg")).getTransformNode(); //25
const leftankle = skeleton.bones.find((b) =>b.name.toLowerCase().includes("leftfoot")).getTransformNode();//27
//右边
const rightshoulder = skeleton.bones.find((b) =>b.name.toLowerCase().includes("rightarm")).getTransformNode();//12
const rightelbow = skeleton.bones.find((b) =>b.name.toLowerCase().includes("rightforearm")).getTransformNode();//14
const rightwrist = skeleton.bones.find((b) =>b.name.toLowerCase().includes("righthand")).getTransformNode();//16
const righthip = skeleton.bones.find((b) =>b.name.toLowerCase().includes("rightupleg")).getTransformNode();//24
const rightknee = skeleton.bones.find((b) =>b.name.toLowerCase().includes("rightleg")).getTransformNode();//26
const rightankle = skeleton.bones.find((b) =>b.name.toLowerCase().includes("rightfoot")).getTransformNode();//28

//中间
const spine=skeleton.bones.find((b) =>b.name.toLowerCase().includes("hips")).getTransformNode();

//骨骼旋转
leftwrist.rotate(BABYLON.Axis.Y, Math.PI/2, BABYLON.Space.LOCAL);

//渲染循环
engine.runRenderLoop(() => {
  scene.render();
});

// === 初始化逻辑 ===
const pose = setupPoseDetection((landmarks) => {
  allLandmarkFrames.push(JSON.parse(JSON.stringify(landmarks))); // 保存一帧
  // ✅ 在这里添加数字人驱动逻辑
  // updateAvatarWithLandmarks(landmarks);
});

handleVideoInput(videoElement, fileInput, pose);