flutter开发(二)检测媒体中的静音

发布于:2025-08-16 ⋅ 阅读:(16) ⋅ 点赞:(0)

要分析媒体中的静音,需要分成几步:

  1. 提取媒体中的音频;
  2. 转化音频为裸pcm数据;
  3. 分析pcm数据,输出静音结果。

引入 ffmpeg

我们可以使用ffmpeg来提取媒体中的音频。

在flutter中使用ffmpeg,可以使用ffmpeg_kit_flutter_new模块。

flutter add ffmpeg_kit_flutter_new

也可以在pubspec.yaml中添加:

dependencies:  
 flutter:  
   sdk: flutter  
 ……
 ffmpeg_kit_flutter_new: ^3.1.0

然后执行flutter pub get

之后就可以使用FFMpegKit等类来实现功能了。

如以下代码,就可以提取出a.mp4视频文件中的音频到a.mp3:

import 'dart:async';  
import 'dart:io';  
  
import 'package:ffmpeg_kit_flutter_new/ffmpeg_kit.dart';  
import 'package:ffmpeg_kit_flutter_new/ffmpeg_kit_config.dart'; 

final command = '-y -i a.mp4 a.mp3';  
  
try {
      final session = await FFmpegKit.execute(command);  
      final returnCode = await session.getReturnCode();  
  
      if (!ReturnCode.isSuccess(returnCode)) {    
        final logs = await session.getLogsAsString();  
        _logger.warning(
            'FFmpeg execution failed for a.mp4. Return code: $returnCode\n$logs');    
      }
    } catch (e, s) {  
      _logger.severe('Error during audio processing for $filePath', e, s);    
    }

管道

以上代码,虽然可以把媒体中的音频提取出来,但是要保存到文件中。对于嵌入式设备来说,频繁地IO对设备不够友好。

我们可以使用管道:ffmpeg执行写入管道,另外一个异步方法读取管道的数据,进行解析。

ffmpeg使用管道的方法非常简单,只要使用FfMpegConfig.registerPipe()获取一个管道的路径,然后以普通路径的方式在FfmpegKit.execute()中调用就行了。

如:

    // 注册管道
    final pipePath = await FFmpegKitConfig.registerNewFFmpegPipe();  
    if (pipePath == null) {  
      _logger.severe('Failed to create an FFmpeg pipe.');  
      return null;  
    }  

    // 使用s16le转码为16位小端序整数
    // 单通道
    // 拼接管道文件到末尾
    final command = '-y -i a.mp4 -f s16le -ac 1 $pipePath';  
  
    try {
      // 这里的onData是另外一个异步方法,后文会使用。  
      final processingFuture = onData(File(pipePath).openRead(), duration);  
  
      final session = await FFmpegKit.execute(command);  
      final returnCode = await session.getReturnCode();  
  
      if (ReturnCode.isSuccess(returnCode)) {  
        return await processingFuture;  
      } else {
        final logs = await session.getLogsAsString();  
        _logger.warning(  
            'FFmpeg execution failed for a.mp4. Return code: $returnCode\n$logs');
      }  
    } catch (e, s) {  
      _logger.severe('Error during audio processing for $filePath', e, s);
    } finally {
      
      // 最后释放管道
      FFmpegKitConfig.closeFFmpegPipe(pipePath);  
    }  
  }

检测静音

我们使用ffmpeg提取出媒体中的音频数据,数据格式为每秒16000个采样点,每个采样点一个16位整数,就可以通过声音的分贝计算公式,来判断每个采样点是否是静音了。

    // 静音分贝阈值暂时设为30分贝
    // 采样点分贝数低于这个数值的则为静音
    final silenceThreshold = -30;

    // 静音时间阈值设为2秒
    // 低于2秒的静音忽略
    final minSilenceDuration = Duration(seconds: 2);

    // 计算分贝数对应的16位采样点值
    final linearThreshold = pow(10, silenceThreshold / 20) * 32767;

    // 计算静音时间对应的采样数
    final minSilenceSamples =
        (minSilenceDuration.inMilliseconds / 1000.0 * 16000).round();

    // 根据时长计算总采样点数
    final totalSamples =
        (audioDuration.inMilliseconds / 1000.0 * options.sampleRate).round();

    int samplesProcessed = 0;
    int silenceStartSample = -1;

    await for (var chunk in pcmStream) {
      final samples = Int16List.view(Uint8List.fromList(chunk).buffer);

      for (int i = 0; i < samples.length; i++) {
        final currentSampleIndex = samplesProcessed + i;
        final isSilent = samples[i].abs() < linearThreshold;

        // 如果当前采样点是静音,而且前面的不是静音,表示静音开始
        if (isSilent && silenceStartSample == -1) {
          silenceStartSample = currentSampleIndex;

        // 如果当前采样点不是静音,而且前面的是静音,则表示一个静音段
        } else if (!isSilent && silenceStartSample != -1) {
          final silentSamples = currentSampleIndex - silenceStartSample;

         // 如果静音段的节点数超过时长阈值,则记录一个静音
          if (silentSamples >= minSilenceSamples) {
            final startTime =
                _calculateTime(silenceStartSample, totalSamples, audioDuration);
            final endTime =
                _calculateTime(currentSampleIndex, totalSamples, audioDuration);
            silenceSegments.add((startTime, endTime));
          }
          silenceStartSample = -1;
        }
      }
      samplesProcessed += samples.length;
    }

样例代码

我们把以上代码总结起来,写成两个服务。

  • 一个FFmpegProcessingService,用于通过给定的参数调用ffmpeg。
import 'dart:async';  
import 'dart:io';  
  
import 'package:ffmpeg_kit_flutter_new/ffmpeg_kit.dart';  
import 'package:ffmpeg_kit_flutter_new/ffmpeg_kit_config.dart';  
import 'package:ffmpeg_kit_flutter_new/return_code.dart';  
import 'package:logging/logging.dart';  
  
class FFmpegPcmConversionOptions {  
  final int sampleRate;  
  final String format;  
  final int channels;  
  
  FFmpegPcmConversionOptions({  
    required this.sampleRate,  
    required this.format,  
    this.channels = 1,  
  });  
  
  String toArgs() {  
    return '-f $format -ar $sampleRate -ac $channels';  
  }  
}  
  
class FFmpegProcessingService {  
  final _logger = Logger('FFmpegProcessingService');  
  
  Future<T?> processAudio<T>({  
    required String filePath,  
    required Duration duration,  
    required FFmpegPcmConversionOptions options,  
    required Future<T> Function(  
            Stream<List<int>> pcmStream, Duration audioDuration)  
        onData,  
  }) async {  
    final pipePath = await FFmpegKitConfig.registerNewFFmpegPipe();  
    if (pipePath == null) {  
      _logger.severe('Failed to create an FFmpeg pipe.');  
      return null;  
    }  
  
    final command = '-y -i "$filePath" ${options.toArgs()} $pipePath';  
  
    try {  
      final processingFuture = onData(File(pipePath).openRead(), duration);  
  
      final session = await FFmpegKit.execute(command);  
      final returnCode = await session.getReturnCode();  
  
      if (ReturnCode.isSuccess(returnCode)) {  
        return await processingFuture;  
      } else {  
        final logs = await session.getLogsAsString();  
        _logger.warning(  
            'FFmpeg execution failed for $filePath. Return code: $returnCode\n$logs');  
        return null;  
      }  
    } catch (e, s) {  
      _logger.severe('Error during audio processing for $filePath', e, s);  
      return null;  
    } finally {  
      FFmpegKitConfig.closeFFmpegPipe(pipePath);  
    }  
  }  
}
  • 一个SilenceDetectionService,用于调用FFmpegProcessingService,注册异步回调方法,生成静音片段列表。
import 'dart:async';  
import 'dart:math';  
import 'dart:typed_data';  
  
import 'package:example/app/services/ffmpeg_processing_service.dart';
import 'package:logging/logging.dart';

class SilenceDetectionService {
  final _logger = Logger('SilenceDetectionService');
  final VoiceDetectionService _voiceDetectionService = VoiceDetectionService();
  final FFmpegProcessingService _ffmpegProcessingService =
      FFmpegProcessingService();

  Future<List<(Duration, Duration)>> findSilenceSegments(
    String filePath,
    Duration duration, {
    required double silenceThreshold,
    required Duration minSilenceDuration,
  }) async {
    try {
      final options =
          FFmpegPcmConversionOptions(sampleRate: 16000, format: 's16le');

      final result = await _ffmpegProcessingService.processAudio(
        filePath: filePath,
        duration: duration,
        options: options,
        onData: (pcmStream, audioDuration) => _performSilenceDetection(
          pcmStream: pcmStream,
          audioDuration: audioDuration,
          options: options,
          silenceThreshold: silenceThreshold,
          minSilenceDuration: minSilenceDuration,
        ),
      );
      return result ?? [];
    } catch (e, s) {
      _logger.severe(
          'Failed to analyze audio file for silence: $filePath', e, s);
      return [];
    }
  }

  Future<List<(Duration, Duration)>> _performSilenceDetection({
    required Stream<List<int>> pcmStream,
    required Duration audioDuration,
    required FFmpegPcmConversionOptions options,
    required double silenceThreshold,
    required Duration minSilenceDuration,
  }) async {
    final silenceSegments = <(Duration, Duration)>[];
    final linearThreshold = pow(10, silenceThreshold / 20) * 32767;
    final minSilenceSamples =
        (minSilenceDuration.inMilliseconds / 1000.0 * options.sampleRate)
            .round();
    final totalSamples =
        (audioDuration.inMilliseconds / 1000.0 * options.sampleRate).round();

    int samplesProcessed = 0;
    int silenceStartSample = -1;

    await for (var chunk in pcmStream) {
      final samples = Int16List.view(Uint8List.fromList(chunk).buffer);

      for (int i = 0; i < samples.length; i++) {
        final currentSampleIndex = samplesProcessed + i;
        final isSilent = samples[i].abs() < linearThreshold;

        if (isSilent && silenceStartSample == -1) {
          silenceStartSample = currentSampleIndex;
        } else if (!isSilent && silenceStartSample != -1) {
          final silentSamples = currentSampleIndex - silenceStartSample;

          if (silentSamples >= minSilenceSamples) {
            final startTime =
                _calculateTime(silenceStartSample, totalSamples, audioDuration);
            final endTime =
                _calculateTime(currentSampleIndex, totalSamples, audioDuration);
            silenceSegments.add((startTime, endTime));
          }
          silenceStartSample = -1;
        }
      }
      samplesProcessed += samples.length;
    }

    if (silenceStartSample != -1) {
      final silentSamples = totalSamples - silenceStartSample;
      if (silentSamples >= minSilenceSamples) {
        final startTime =
            _calculateTime(silenceStartSample, totalSamples, audioDuration);
        silenceSegments.add((startTime, audioDuration));
      }
    }

    _logger.info(
        'Silence analysis complete, found ${silenceSegments.length} segments.');
    return silenceSegments;
  }

  // 通过采样点计算时间
  Duration _calculateTime(
      int sampleIndex, int totalSamples, Duration audioDuration) {
    if (totalSamples == 0) return Duration.zero;
    final ratio = sampleIndex / totalSamples;
    return Duration(
        milliseconds: (audioDuration.inMilliseconds * ratio).round());
  }
}


网站公告

今日签到

点亮在社区的每一天
去签到