【android bluetooth 协议分析 02】【bluetooth hal 层详解 7】【高通蓝牙hal-读流程介绍】

发布于:2025-06-06 ⋅ 阅读:(14) ⋅ 点赞:(0)

1. 问题

今天遇到一个 hal 进程 被干掉的问题。 接这个机会我来梳理一下, 高通的hal 侧,在读到 串口数据后,是如何上报给 bt.server 的。

先来看一下这个问题日志:

在这里插入图片描述


05-29 15:01:47.825621  1220  2976 E vendor.randroid.bluetooth@1.0-uart_controller: RxThreadTimeOut:Rx thread stuck detected and callback scheduled in TS:3000.00 ms

05-29 15:01:47.825674  1220  2976 E vendor.randroid.bluetooth@1.0-uart_controller: RxThreadTimeOut:reporting SSR.

高通的 hal 里面 报了 Rx thread stuck detected and callback scheduled in TS:3000.00 ms 错误。


// hidl_hci/1.0/default/uart_controller.cpp
void UartController::RxThreadTimeOut(union sigval sig)
{
  {
    std::unique_lock<std::mutex> guard(rx_thread_state_mutex_);
    UartController *uart_controller = static_cast<UartController*>(sig.sival_ptr);
    uart_controller->rx_timer_state_machine_.timer_state = TIMER_OVERFLOW;
    RxTimerSchedTs = std::chrono::steady_clock::now();
    ALOGE("%s:Rx thread stuck detected and callback scheduled in TS:%0.2lf ms",
          __func__, uart_controller->GetRxThreadSchedTSDiff());
  }

  {
    std::unique_lock<std::mutex> guard(rx_thread_timer_mutex_);
    UartController *uart_controller = static_cast<UartController*>(sig.sival_ptr);
    if (uart_controller->GetRxThreadTimerState() != TIMER_OVERFLOW) {
      ALOGI("%s: Rx thread is unblocked resuming back", __func__);
      return;
    }

    ALOGE("%s:reporting SSR.", __func__);
    uart_controller->SsrCleanup(BT_HOST_REASON_RX_THREAD_STUCK);
  }
}
  • 从函数中可以看到是 定时器超时导致的。

看到这里 请思考如下几个问题?

  • 当前定时器 是干什么的?
  • 超时时间多少?
  • 报这个错意味着什么?
  • 这里先剧透一下,这个定时器和 hal 从uart 中读出数据,上报链路 有关。那hal 是如何从 /dev/ttyHS0 中读出数据,上报给 bt.server 的呢?

2 RxThreadTime定时器

1. 启动定时器

// hidl_hci/1.0/default/uart_controller.cpp

#define RX_THREAD_USAGE_TIMEOUT (3000)

void UartController::StartRxThreadTimer()
{
  int status;
  struct itimerspec ts;
  struct sigevent se;

  ALOGV("%s", __func__);
  if (GetRxThreadTimerState() == TIMER_NOT_CREATED) {
    // 1. 设置 超时函数
    se.sigev_notify_function = (void (*)(union sigval))RxThreadTimeOut;
    se.sigev_notify = SIGEV_THREAD;
    se.sigev_value.sival_ptr = this;
    se.sigev_notify_attributes = NULL;

    // 2. 创建 定时器
    status = timer_create(CLOCK_MONOTONIC, &se, &rx_timer_state_machine_.timer_id);
    if (status == 0)
      SetRxThreadTimerState(TIMER_CREATED);
  }

  if ((GetRxThreadTimerState() == TIMER_CREATED) ||
      (GetRxThreadTimerState() == TIMER_OVERFLOW)) {


    // 3. 设置定时时间为 3s
    rx_timer_state_machine_.timeout_ms = RX_THREAD_USAGE_TIMEOUT; // 3s 
    ts.it_value.tv_sec = rx_timer_state_machine_.timeout_ms / 1000;
    ts.it_value.tv_nsec = 1000000 * (rx_timer_state_machine_.timeout_ms % 1000);
    ts.it_interval.tv_sec = 0;
    ts.it_interval.tv_nsec = 0;

    // 4. 3s 后触发 超时函数
    status = timer_settime(rx_timer_state_machine_.timer_id, 0, &ts, 0);
    if (status == -1)
      ALOGE("%s:Failed to set RxThread Usage timer", __func__);
    else
      SetRxThreadTimerState(TIMER_ACTIVE);
  }

  RxTimerStartTs = std::chrono::steady_clock::now();
}
  • UartController::StartRxThreadTimer 函数 创建了一个定时器 ,并在 3s 后触发 超时函数:RxThreadTimeOut

2. 超时处理


// hidl_hci/1.0/default/uart_controller.cpp
void UartController::RxThreadTimeOut(union sigval sig)
{
  {
    std::unique_lock<std::mutex> guard(rx_thread_state_mutex_);
    UartController *uart_controller = static_cast<UartController*>(sig.sival_ptr);
    uart_controller->rx_timer_state_machine_.timer_state = TIMER_OVERFLOW;
    RxTimerSchedTs = std::chrono::steady_clock::now();
    ALOGE("%s:Rx thread stuck detected and callback scheduled in TS:%0.2lf ms",
          __func__, uart_controller->GetRxThreadSchedTSDiff());
  }

  {
    std::unique_lock<std::mutex> guard(rx_thread_timer_mutex_);
    UartController *uart_controller = static_cast<UartController*>(sig.sival_ptr);
    if (uart_controller->GetRxThreadTimerState() != TIMER_OVERFLOW) {
      ALOGI("%s: Rx thread is unblocked resuming back", __func__);
      return;
    }

    ALOGE("%s:reporting SSR.", __func__);
    uart_controller->SsrCleanup(BT_HOST_REASON_RX_THREAD_STUCK);
  }
}

3. 停止超时

// hidl_hci/1.0/default/uart_controller.cpp

void UartController::StopRxThreadTimer()
{
  int status;
  struct itimerspec ts;

  if (GetRxThreadTimerState() != TIMER_NOT_CREATED) {
    ts.it_value.tv_sec = 0;
    ts.it_value.tv_nsec = 0;
    ts.it_interval.tv_sec = 0;
    ts.it_interval.tv_nsec = 0;
    status = timer_settime(rx_timer_state_machine_.timer_id, 0, &ts, 0);
    if(status == -1) {
      ALOGE("%s:Failed to stop Rx thread timer",__func__);
      return;
    }
    ALOGV("%s: Rx thread timer Stopped",__func__);
    SetRxThreadTimerState(TIMER_CREATED);
  }
}

4. 实际使用定时器

hal 进程每次 从 /dev/ttyHSx 读到数据后,就会回调 DataHandler::InternalOnPacketReady 函数。 为啥会回调这里, 在本篇,其他小结阐述。


// hidl_hci/1.0/default/data_handler.cpp
void DataHandler::InternalOnPacketReady(ProtocolType ptype, HciPacketType type,
                          const hidl_vec<uint8_t>*hidl_data, bool from_soc) {
  UNUSED(from_soc);
  uint16_t len = hidl_data->size();
  const uint8_t* data = hidl_data->data();
  ProtocolCallbacksType *cb_data = nullptr;
  static bool reset_rxthread_stuck_prop = true;
  std::map<ProtocolType, ProtocolCallbacksType *>::iterator it;

  // update the pending Init cb and other callbacks
  it = protocol_info_.find(ptype); 
  if (it != protocol_info_.end()) {
    cb_data = (ProtocolCallbacksType*)it->second; // 根据  ptype 从 protocol_info_ 找到对应的回调
  } else {
    ALOGE("%s: Didnt get the callbacks", __func__);
  }

  // execute callbacks here
  if (cb_data != nullptr && controller_ != nullptr) {
    if (!cb_data->is_pending_init_cb ) {
      if (!diag_interface_.isSsrTriggered() ||
          !((UartController *)controller_)->IsBqrRieEnabled()) {
            // 1. 启动 定时器
            controller_->StartRxThreadTimer();
      }
      // 2. 将 从串口读取到的 数据通过 data_read_cb 回调 传递给 bt.server
      cb_data->data_read_cb(type, hidl_data);

	  // 3. 停止 定时器
      controller_->StopRxThreadTimer();
    }

...

}

1. data_read_cb 调用的是谁

这里 我们 先来看一下 cb_data->data_read_cb(type, hidl_data); 这个回调是在哪里:

void DataHandler::InternalOnPacketReady(ProtocolType ptype, HciPacketType type,
                          const hidl_vec<uint8_t>*hidl_data, bool from_soc) {

  // update the pending Init cb and other callbacks
  it = protocol_info_.find(ptype);  // 这里的 ptype == TYPE_BT
  if (it != protocol_info_.end()) {
    cb_data = (ProtocolCallbacksType*)it->second; // 根据  ptype 从 protocol_info_ 找到对应的回调
  } else {
    ALOGE("%s: Didnt get the callbacks", __func__);
  }

下面是 cb_data->data_read_cb(type, hidl_data);

  • data_read_cb 的 初始化流程
// hidl_hci/1.0/default/bluetooth_hci.cpp

Return<void> BluetoothHci::initialize(
  const ::android::sp<IBluetoothHciCallbacks>& cb /*这个回调是 bt.server 传递下来的.*/)
{
  bool rc = false;

  ALOGW("BluetoothHci::initialize(), slot%d", hw_slot_);
  if (cb == nullptr) {
    ALOGE("%s: Received NULL callback from BT client", __func__);
    return Void();
  }
  ::android::sp<IBluetoothHciCallbacks> event_cb_tmp;
  event_cb_tmp = cb;

    rc = DataHandler::Init( TYPE_BT, .A. , .B.); // 这里的 .B. 的内容就是 data_read_cb

...

}



// hidl_hci/1.0/default/data_handler.cpp


bool DataHandler::Init(ProtocolType type, InitializeCallback init_cb,
                       DataReadCallback data_read_cb)
{
  return data_handler->Open(type, init_cb, data_read_cb /*关注这里*/);
}

bool DataHandler::Open(ProtocolType type, InitializeCallback init_cb,
                       DataReadCallback data_read_cb /*关注这里*/)
{
	...
  it = protocol_info_.find(type);
  if (it == protocol_info_.end()) {
    ProtocolCallbacksType *cb_data  = new (ProtocolCallbacksType);
    cb_data->type = type;
    cb_data->is_pending_init_cb = true;
    cb_data->init_cb = init_cb;
    cb_data->data_read_cb = data_read_cb; // 主要关注这里的 赋值
    protocol_info_[type] = cb_data;
  }

	...
}



// 此时我们在来看这段函数 此时是不是就能清楚的看到   cb_data->data_read_cb(type, hidl_data);  调用的是谁了。
// hidl_hci/1.0/default/data_handler.cpp
void DataHandler::InternalOnPacketReady(ProtocolType ptype, HciPacketType type,
                          const hidl_vec<uint8_t>*hidl_data, bool from_soc) {
  UNUSED(from_soc);
  uint16_t len = hidl_data->size();
  const uint8_t* data = hidl_data->data();
  ProtocolCallbacksType *cb_data = nullptr;
  static bool reset_rxthread_stuck_prop = true;
  std::map<ProtocolType, ProtocolCallbacksType *>::iterator it;

  // update the pending Init cb and other callbacks
  it = protocol_info_.find(ptype); 
  if (it != protocol_info_.end()) {
    cb_data = (ProtocolCallbacksType*)it->second; // 根据  ptype 从 protocol_info_ 找到对应的回调
  } else {
    ALOGE("%s: Didnt get the callbacks", __func__);
  }

  // execute callbacks here
  if (cb_data != nullptr && controller_ != nullptr) {
    if (!cb_data->is_pending_init_cb ) {
      if (!diag_interface_.isSsrTriggered() ||
          !((UartController *)controller_)->IsBqrRieEnabled()) {
            // 1. 启动 定时器
            controller_->StartRxThreadTimer();
      }
      // 2. 将 从串口读取到的 数据通过 data_read_cb 回调 传递给 bt.server
      cb_data->data_read_cb(type, hidl_data);

	  // 3. 停止 定时器
      controller_->StopRxThreadTimer();
    }

...

}

BluetoothHci::initialize 函数是什么时候触发调用的请 参照 :


我将 .B. 单独摘出来了。 cb_data->data_read_cb(type, hidl_data); 将回调到这里


      [this, event_cb_tmp](HciPacketType type, const hidl_vec<uint8_t> *packet) {
        DataHandler *data_handler = DataHandler::Get();
        if (event_cb_tmp == nullptr) {
          ALOGE("BluetoothHci: event_cb_tmp is null");
          if (data_handler)
            data_handler->SetClientStatus(false, TYPE_BT);
          return;
        }
        /* Skip calling client callback when client is dead */
        if(data_handler && (data_handler->GetClientStatus(TYPE_BT) == false)) {
          ALOGI("%s: Skip calling client callback when client is dead", __func__);
          return;
        }
        Logger::Get()->UpdateRxTimeStamp();
        switch (type) {
          case HCI_PACKET_TYPE_EVENT: // 如果是 hci event 事件
          {
            auto hidl_client_status = event_cb_tmp->hciEventReceived(*packet); // 将通过回到 bt.server 的回调函数传递给协议栈
            if(!hidl_client_status.isOk()) {
              ALOGE("Client dead, callback hciEventReceived failed");
              if (data_handler)
                data_handler->SetClientStatus(false, TYPE_BT);
            }
          }
          break;
          case HCI_PACKET_TYPE_ACL_DATA: // 如果是 acl 数据
          {
            auto hidl_client_status = event_cb_tmp->aclDataReceived(*packet); // 将通过回到 bt.server 的回调函数传递给协议栈
            if(!hidl_client_status.isOk()) {
              ALOGE("Client dead, callback aclDataReceived failed");
              if (data_handler)
                data_handler->SetClientStatus(false, TYPE_BT);
            }
          }
          break;
          default:
            ALOGE("%s Unexpected event type %d", __func__, type);
            break;
        }
      }

5. 小结

当每次 从 /dev/ttyHSx 中读到 串口上的数据时,最终会回调到 DataHandler::InternalOnPacketReady 函数中:

  1. 在向 bt.server 传递 hci.event 、acl , 数据时, 会 先定一个 3 s 的定时器。
  2. 在向 bt.server 回调 数据。当bt.server 处理完成后, 返回。
  3. 此时 取消之前的定时器。

从这个过程可以清楚的看到, RxThreadTime 定时器就是为了保证 hal -> bt.server 链路,始终处于正常运转的。 如果超时证明当前链路出现问题。 有可能协议栈 没有计时处理。 此时 hal 定时器超时, 杀死 hal 进程。


3. hal 如何从串口读数据并回调

现在 还有一个问题就是 如何从 /dev/ttyHSx 读到数据, 最终为何会回调到 DataHandler::InternalOnPacketReady 函数? 这也是本篇 另外一个重要的议题。

1. 相关函数讲解

1. DataHandler::Open



bool DataHandler::Open(ProtocolType type, InitializeCallback init_cb,
                       DataReadCallback data_read_cb)
{
...

    if (controller_) {
      int retry_count = 0;
      while (retry_count < INIT_MAX_RETRY_TIMES) {

        // 在调用 controller_->Init 中传入了一个回调函数 ,这个回调函数中 调用了 DataHandler::OnPacketReady
        status = controller_->Init([this](ProtocolType ptype, HciPacketType type,
                                          const hidl_vec<uint8_t> *hidl_data)   {
                                     OnPacketReady(ptype, type, hidl_data);
                                   });
        if (status)
          break;
        ++retry_count;
      }
    }

...

}


// 每次调用  DataHandler::OnPacketReady 就会触发 InternalOnPacketReady 调用

void DataHandler::OnPacketReady(ProtocolType ptype, HciPacketType type,
                                const hidl_vec<uint8_t>*hidl_data, bool from_soc)
{
...
  InternalOnPacketReady(ptype, type, hidl_data, from_soc);
  return;
}

2. UartController::Init

status = controller_->Init([this](ProtocolType ptype, HciPacketType type,
                                          const hidl_vec<uint8_t> *hidl_data)   {
                                     OnPacketReady(ptype, type, hidl_data);
                                   });

bool UartController::Init(PacketReadCallback pkt_read_cb){
...

  ALOGI("soc need reload patch = %d", soc_need_reload_patch);
  read_cb_ = pkt_read_cb; // 会将 回调注册到 UartController::read_cb_ 中

...
}

那这里的 read_cb_ 是在哪里回调的?

void UartController::OnPacketReady(hidl_vec<uint8_t> *data)
{
...
  if (read_cb_) {
    read_cb_(GetProtocol(hci_packet_type_), hci_packet_type_, data);
  }
 ...
}

3. UartController::UartController

那谁来回调 UartController::OnPacketReady

在UartController 构造函数里面,我们将 UartController::OnPacketReady 传递给了 HciPacketizer 对象


HciPacketizer hci_packetizer_;

UartController::UartController(BluetoothSocType soc_type)
  : soc_crashed(false), soc_type_(soc_type),
  hci_packetizer_([this](hidl_vec<uint8_t> *data) { OnPacketReady(data); })
{

}

4. HciPacketizer

class HciPacketizer {
 public:
  HciPacketizer(_HciPacketReadyCallback packet_cb)
      : _packet_ready_cb_(packet_cb){ HciPacketReadyCallback_flag = 1; };

在 HciPacketizer 构造中,将 UartController::OnPacketReady 保存到了 _packet_ready_cb_ 变量中

_packet_ready_cb_ 在哪里回调



void HciPacketizer::OnDataReady(int fd, HciPacketType packet_type)
{
  switch (state_) {
    case HCI_PAYLOAD:
    if (HciPacketReadyCallback_flag)
    {
      // 从 /dev/ttyHSx 中读出数据
      ssize_t bytes_read = TEMP_FAILURE_RETRY(read(
                           fd, packet_new_->data() + GetPreambleSizeForType(packet_type)
                           + bytes_read_, bytes_remaining_));
      retry_read_ = 0;
      bytes_remaining_ -= bytes_read;
      bytes_read_ += bytes_read;
      if (bytes_remaining_ == 0) {
        _packet_ready_cb_(packet_new_); // 回调 注册的函数
        state_ = HCI_PREAMBLE;
        bytes_read_ = 0;
      }
      break;
    } else {
      ssize_t bytes_read = TEMP_FAILURE_RETRY(read(
                        fd, packet_.data() + GetPreambleSizeForType(packet_type) + bytes_read_,
                        bytes_remaining_));
      if (bytes_read == 0) {
        // This is only expected if the UART got closed when shutting down.
        ALOGE("%s: Unexpected EOF reading the payload!", __func__);
        sleep(5);  // Expect to be shut down within 5 seconds.
        return;
      }
      if (bytes_read < 0) {
        LOG_ALWAYS_FATAL("%s: Read payload error: %s", __func__,
                        strerror(errno));
      }
      bytes_remaining_ -= bytes_read;
      bytes_read_ += bytes_read;
      if (bytes_remaining_ == 0) {
        packet_ready_cb_();
        state_ = HCI_PREAMBLE;
        bytes_read_ = 0;
      }
      break;
    }
  }
}

HciPacketizer::OnDataReady 在哪里回调的?

5. UartController::OnDataReady

void UartController::OnDataReady(int fd)
{
	...
	hci_packetizer_.OnDataReady(fd, hci_packet_type_);
	...
}

6. UartController::Init

bool UartController::Init(PacketReadCallback pkt_read_cb)
{
...

  // set up the fd watcher now
  ret = fd_watcher_.WatchFdForNonBlockingReads(
              uart_transport->GetCtrlFd(),
                [this](int fd) { OnDataReady(fd); });

...

}

7. AsyncFdWatcher::WatchFdForNonBlockingReads

int AsyncFdWatcher::WatchFdForNonBlockingReads(
  int file_descriptor /*这里传入的就是 /dev/ttyHSx , 对应我们的串口 fd */, const ReadCallback& on_read_fd_ready_callback)
{
  // Add file descriptor and callback
  {
    std::unique_lock<std::mutex> guard(internal_mutex_);
    watched_fds_[file_descriptor] = on_read_fd_ready_callback; // 将  UartController::OnDataReady 注册到这里
  }

  // Start the thread if not started yet
  return TryStartThread(); // 这里启动了 一个线程
}



int AsyncFdWatcher::TryStartThread()
{
  if (std::atomic_exchange(&running_, true)) return 0;

  // Set up the communication channel
  int pipe_fds[2];
  if (pipe2(pipe_fds, O_NONBLOCK)) return -1;

  notification_listen_fd_ = pipe_fds[0];
  notification_write_fd_ = pipe_fds[1];

  thread_ = std::thread([this]() { ThreadRoutine(); }); // 启动线程
  if (!thread_.joinable()) return -1;

  return 0;
}

8. AsyncFdWatcher::ThreadRoutine

// hidl_hci/1.0/default/async_fd_watcher.cpp
void AsyncFdWatcher::ThreadRoutine()
{

  while (running_) {
    fd_set read_fds;
    FD_ZERO(&read_fds);
    FD_SET(notification_listen_fd_, &read_fds);
    int max_read_fd = INVALID_FD;

    // watched_fds_ 中有 我们打开的 串口 fd
    for (auto& it : watched_fds_) {
      FD_SET(it.first, &read_fds); // 将 /dev/ttyHSx 对应的fd 加入到 read_fds
      max_read_fd = std::max(max_read_fd, it.first);
    }

    struct timeval timeout;
    struct timeval* timeout_ptr = NULL;
    if (timeout_ms_ > std::chrono::milliseconds(0)) {
      timeout.tv_sec = timeout_ms_.count() / 1000;
      timeout.tv_usec = (timeout_ms_.count() % 1000) * 1000;
      timeout_ptr = &timeout;
    }

    // Wait until there is data available to read on some FD.
    int nfds = std::max(notification_listen_fd_, max_read_fd);

    // 当 串口有数据,或者超时后,select就返回 
    /*
	    int select(int nfds, fd_set *readfds, fd_set *writefds,
                  fd_set *exceptfds, struct timeval *timeout);
    
    */
	int retval = select(nfds + 1, &read_fds/*这里只监听,可读*/, NULL, NULL, timeout_ptr);

    // There was some error.
    if (retval < 0) continue;

    // Timeout.
    if (retval == 0) {
		// 暂时忽略超时 处理
    }

    // Read data from the notification FD.
    if (FD_ISSET(notification_listen_fd_, &read_fds)) {
      char buffer[] = { 0 };
      TEMP_FAILURE_RETRY(read(notification_listen_fd_, buffer, 1));
      continue;
    }

    // Invoke the data ready callbacks if appropriate.
    std::vector<decltype(watched_fds_) ::value_type> saved_callbacks;
    {
      std::unique_lock<std::mutex> guard(internal_mutex_);
      for (auto& it : watched_fds_) {
        // 遍历 注册进入 watched_fds_ 中的fd, 找到可读的
        if (FD_ISSET(it.first, &read_fds)) {
          // 假如此时找到了 /dev/ttyHSx 可读, 此时将 UartController::OnDataReady 回调加入到 saved_callbacks 中。
          saved_callbacks.push_back(it);
        }
      }
    }

    for (auto& it : saved_callbacks) {
      if (it.second) {
        it.second(it.first); // 这里挨个调用 加入到 saved_callbacks 中的回调,此时 就会回调 UartController::OnDataReady
      }
    }
  }
  ALOGE("%s: End of AsyncFdWatcher::ThreadRoutine", __func__);
}

2. 回调路径总结


DataHandler::Open()
	controller_->Init(携带 DataHandler::OnPacketReady 回调 )
		启动 AsyncFdWatcher::ThreadRoutine 监听 /dev/ttyHSx 是否可读


UartController::OnPacketReady 的回调会触发 DataHandler::OnPacketReady 回调

在 UartController 构造中,将 UartController::OnPacketReady 传入  HciPacketizer 中


最终 在 HciPacketizer::OnDataReady 中会触发 UartController::OnPacketReady 回调


UartController::OnDataReady 会触发 HciPacketizer::OnDataReady



AsyncFdWatcher::ThreadRoutine 中监听 /dev/ttyHSx fd,如果可读,此时会回调 UartController::OnDataReady

所以当 AsyncFdWatcher::ThreadRoutine 监听到 /dev/ttyHSx 有数据可读:
-> UartController::OnDataReady -> HciPacketizer::OnDataReady -> UartController::OnPacketReady -> DataHandler::OnPacketReady -> DataHandler::InternalOnPacketReady


希望你没有学废, 反正我已经废了。 欢迎评论。一起进步!!!