1. 问题
今天遇到一个 hal 进程 被干掉的问题。 接这个机会我来梳理一下, 高通的hal 侧,在读到 串口数据后,是如何上报给 bt.server 的。
先来看一下这个问题日志:
05-29 15:01:47.825621 1220 2976 E vendor.randroid.bluetooth@1.0-uart_controller: RxThreadTimeOut:Rx thread stuck detected and callback scheduled in TS:3000.00 ms
05-29 15:01:47.825674 1220 2976 E vendor.randroid.bluetooth@1.0-uart_controller: RxThreadTimeOut:reporting SSR.
高通的 hal 里面 报了 Rx thread stuck detected and callback scheduled in TS:3000.00 ms 错误。
// hidl_hci/1.0/default/uart_controller.cpp
void UartController::RxThreadTimeOut(union sigval sig)
{
{
std::unique_lock<std::mutex> guard(rx_thread_state_mutex_);
UartController *uart_controller = static_cast<UartController*>(sig.sival_ptr);
uart_controller->rx_timer_state_machine_.timer_state = TIMER_OVERFLOW;
RxTimerSchedTs = std::chrono::steady_clock::now();
ALOGE("%s:Rx thread stuck detected and callback scheduled in TS:%0.2lf ms",
__func__, uart_controller->GetRxThreadSchedTSDiff());
}
{
std::unique_lock<std::mutex> guard(rx_thread_timer_mutex_);
UartController *uart_controller = static_cast<UartController*>(sig.sival_ptr);
if (uart_controller->GetRxThreadTimerState() != TIMER_OVERFLOW) {
ALOGI("%s: Rx thread is unblocked resuming back", __func__);
return;
}
ALOGE("%s:reporting SSR.", __func__);
uart_controller->SsrCleanup(BT_HOST_REASON_RX_THREAD_STUCK);
}
}
- 从函数中可以看到是 定时器超时导致的。
看到这里 请思考如下几个问题?
- 当前定时器 是干什么的?
- 超时时间多少?
- 报这个错意味着什么?
- 这里先剧透一下,这个定时器和 hal 从uart 中读出数据,上报链路 有关。那hal 是如何从 /dev/ttyHS0 中读出数据,上报给 bt.server 的呢?
2 RxThreadTime定时器
1. 启动定时器
// hidl_hci/1.0/default/uart_controller.cpp
#define RX_THREAD_USAGE_TIMEOUT (3000)
void UartController::StartRxThreadTimer()
{
int status;
struct itimerspec ts;
struct sigevent se;
ALOGV("%s", __func__);
if (GetRxThreadTimerState() == TIMER_NOT_CREATED) {
// 1. 设置 超时函数
se.sigev_notify_function = (void (*)(union sigval))RxThreadTimeOut;
se.sigev_notify = SIGEV_THREAD;
se.sigev_value.sival_ptr = this;
se.sigev_notify_attributes = NULL;
// 2. 创建 定时器
status = timer_create(CLOCK_MONOTONIC, &se, &rx_timer_state_machine_.timer_id);
if (status == 0)
SetRxThreadTimerState(TIMER_CREATED);
}
if ((GetRxThreadTimerState() == TIMER_CREATED) ||
(GetRxThreadTimerState() == TIMER_OVERFLOW)) {
// 3. 设置定时时间为 3s
rx_timer_state_machine_.timeout_ms = RX_THREAD_USAGE_TIMEOUT; // 3s
ts.it_value.tv_sec = rx_timer_state_machine_.timeout_ms / 1000;
ts.it_value.tv_nsec = 1000000 * (rx_timer_state_machine_.timeout_ms % 1000);
ts.it_interval.tv_sec = 0;
ts.it_interval.tv_nsec = 0;
// 4. 3s 后触发 超时函数
status = timer_settime(rx_timer_state_machine_.timer_id, 0, &ts, 0);
if (status == -1)
ALOGE("%s:Failed to set RxThread Usage timer", __func__);
else
SetRxThreadTimerState(TIMER_ACTIVE);
}
RxTimerStartTs = std::chrono::steady_clock::now();
}
- UartController::StartRxThreadTimer 函数 创建了一个定时器 ,并在 3s 后触发 超时函数:RxThreadTimeOut
2. 超时处理
// hidl_hci/1.0/default/uart_controller.cpp
void UartController::RxThreadTimeOut(union sigval sig)
{
{
std::unique_lock<std::mutex> guard(rx_thread_state_mutex_);
UartController *uart_controller = static_cast<UartController*>(sig.sival_ptr);
uart_controller->rx_timer_state_machine_.timer_state = TIMER_OVERFLOW;
RxTimerSchedTs = std::chrono::steady_clock::now();
ALOGE("%s:Rx thread stuck detected and callback scheduled in TS:%0.2lf ms",
__func__, uart_controller->GetRxThreadSchedTSDiff());
}
{
std::unique_lock<std::mutex> guard(rx_thread_timer_mutex_);
UartController *uart_controller = static_cast<UartController*>(sig.sival_ptr);
if (uart_controller->GetRxThreadTimerState() != TIMER_OVERFLOW) {
ALOGI("%s: Rx thread is unblocked resuming back", __func__);
return;
}
ALOGE("%s:reporting SSR.", __func__);
uart_controller->SsrCleanup(BT_HOST_REASON_RX_THREAD_STUCK);
}
}
3. 停止超时
// hidl_hci/1.0/default/uart_controller.cpp
void UartController::StopRxThreadTimer()
{
int status;
struct itimerspec ts;
if (GetRxThreadTimerState() != TIMER_NOT_CREATED) {
ts.it_value.tv_sec = 0;
ts.it_value.tv_nsec = 0;
ts.it_interval.tv_sec = 0;
ts.it_interval.tv_nsec = 0;
status = timer_settime(rx_timer_state_machine_.timer_id, 0, &ts, 0);
if(status == -1) {
ALOGE("%s:Failed to stop Rx thread timer",__func__);
return;
}
ALOGV("%s: Rx thread timer Stopped",__func__);
SetRxThreadTimerState(TIMER_CREATED);
}
}
4. 实际使用定时器
hal 进程每次 从 /dev/ttyHSx 读到数据后,就会回调 DataHandler::InternalOnPacketReady 函数。 为啥会回调这里, 在本篇,其他小结阐述。
// hidl_hci/1.0/default/data_handler.cpp
void DataHandler::InternalOnPacketReady(ProtocolType ptype, HciPacketType type,
const hidl_vec<uint8_t>*hidl_data, bool from_soc) {
UNUSED(from_soc);
uint16_t len = hidl_data->size();
const uint8_t* data = hidl_data->data();
ProtocolCallbacksType *cb_data = nullptr;
static bool reset_rxthread_stuck_prop = true;
std::map<ProtocolType, ProtocolCallbacksType *>::iterator it;
// update the pending Init cb and other callbacks
it = protocol_info_.find(ptype);
if (it != protocol_info_.end()) {
cb_data = (ProtocolCallbacksType*)it->second; // 根据 ptype 从 protocol_info_ 找到对应的回调
} else {
ALOGE("%s: Didnt get the callbacks", __func__);
}
// execute callbacks here
if (cb_data != nullptr && controller_ != nullptr) {
if (!cb_data->is_pending_init_cb ) {
if (!diag_interface_.isSsrTriggered() ||
!((UartController *)controller_)->IsBqrRieEnabled()) {
// 1. 启动 定时器
controller_->StartRxThreadTimer();
}
// 2. 将 从串口读取到的 数据通过 data_read_cb 回调 传递给 bt.server
cb_data->data_read_cb(type, hidl_data);
// 3. 停止 定时器
controller_->StopRxThreadTimer();
}
...
}
1. data_read_cb 调用的是谁
这里 我们 先来看一下 cb_data->data_read_cb(type, hidl_data); 这个回调是在哪里:
void DataHandler::InternalOnPacketReady(ProtocolType ptype, HciPacketType type,
const hidl_vec<uint8_t>*hidl_data, bool from_soc) {
// update the pending Init cb and other callbacks
it = protocol_info_.find(ptype); // 这里的 ptype == TYPE_BT
if (it != protocol_info_.end()) {
cb_data = (ProtocolCallbacksType*)it->second; // 根据 ptype 从 protocol_info_ 找到对应的回调
} else {
ALOGE("%s: Didnt get the callbacks", __func__);
}
下面是 cb_data->data_read_cb(type, hidl_data);
- data_read_cb 的 初始化流程
// hidl_hci/1.0/default/bluetooth_hci.cpp
Return<void> BluetoothHci::initialize(
const ::android::sp<IBluetoothHciCallbacks>& cb /*这个回调是 bt.server 传递下来的.*/)
{
bool rc = false;
ALOGW("BluetoothHci::initialize(), slot%d", hw_slot_);
if (cb == nullptr) {
ALOGE("%s: Received NULL callback from BT client", __func__);
return Void();
}
::android::sp<IBluetoothHciCallbacks> event_cb_tmp;
event_cb_tmp = cb;
rc = DataHandler::Init( TYPE_BT, .A. , .B.); // 这里的 .B. 的内容就是 data_read_cb
...
}
// hidl_hci/1.0/default/data_handler.cpp
bool DataHandler::Init(ProtocolType type, InitializeCallback init_cb,
DataReadCallback data_read_cb)
{
return data_handler->Open(type, init_cb, data_read_cb /*关注这里*/);
}
bool DataHandler::Open(ProtocolType type, InitializeCallback init_cb,
DataReadCallback data_read_cb /*关注这里*/)
{
...
it = protocol_info_.find(type);
if (it == protocol_info_.end()) {
ProtocolCallbacksType *cb_data = new (ProtocolCallbacksType);
cb_data->type = type;
cb_data->is_pending_init_cb = true;
cb_data->init_cb = init_cb;
cb_data->data_read_cb = data_read_cb; // 主要关注这里的 赋值
protocol_info_[type] = cb_data;
}
...
}
// 此时我们在来看这段函数 此时是不是就能清楚的看到 cb_data->data_read_cb(type, hidl_data); 调用的是谁了。
// hidl_hci/1.0/default/data_handler.cpp
void DataHandler::InternalOnPacketReady(ProtocolType ptype, HciPacketType type,
const hidl_vec<uint8_t>*hidl_data, bool from_soc) {
UNUSED(from_soc);
uint16_t len = hidl_data->size();
const uint8_t* data = hidl_data->data();
ProtocolCallbacksType *cb_data = nullptr;
static bool reset_rxthread_stuck_prop = true;
std::map<ProtocolType, ProtocolCallbacksType *>::iterator it;
// update the pending Init cb and other callbacks
it = protocol_info_.find(ptype);
if (it != protocol_info_.end()) {
cb_data = (ProtocolCallbacksType*)it->second; // 根据 ptype 从 protocol_info_ 找到对应的回调
} else {
ALOGE("%s: Didnt get the callbacks", __func__);
}
// execute callbacks here
if (cb_data != nullptr && controller_ != nullptr) {
if (!cb_data->is_pending_init_cb ) {
if (!diag_interface_.isSsrTriggered() ||
!((UartController *)controller_)->IsBqrRieEnabled()) {
// 1. 启动 定时器
controller_->StartRxThreadTimer();
}
// 2. 将 从串口读取到的 数据通过 data_read_cb 回调 传递给 bt.server
cb_data->data_read_cb(type, hidl_data);
// 3. 停止 定时器
controller_->StopRxThreadTimer();
}
...
}
BluetoothHci::initialize 函数是什么时候触发调用的请 参照 :
我将 .B. 单独摘出来了。 cb_data->data_read_cb(type, hidl_data); 将回调到这里
[this, event_cb_tmp](HciPacketType type, const hidl_vec<uint8_t> *packet) {
DataHandler *data_handler = DataHandler::Get();
if (event_cb_tmp == nullptr) {
ALOGE("BluetoothHci: event_cb_tmp is null");
if (data_handler)
data_handler->SetClientStatus(false, TYPE_BT);
return;
}
/* Skip calling client callback when client is dead */
if(data_handler && (data_handler->GetClientStatus(TYPE_BT) == false)) {
ALOGI("%s: Skip calling client callback when client is dead", __func__);
return;
}
Logger::Get()->UpdateRxTimeStamp();
switch (type) {
case HCI_PACKET_TYPE_EVENT: // 如果是 hci event 事件
{
auto hidl_client_status = event_cb_tmp->hciEventReceived(*packet); // 将通过回到 bt.server 的回调函数传递给协议栈
if(!hidl_client_status.isOk()) {
ALOGE("Client dead, callback hciEventReceived failed");
if (data_handler)
data_handler->SetClientStatus(false, TYPE_BT);
}
}
break;
case HCI_PACKET_TYPE_ACL_DATA: // 如果是 acl 数据
{
auto hidl_client_status = event_cb_tmp->aclDataReceived(*packet); // 将通过回到 bt.server 的回调函数传递给协议栈
if(!hidl_client_status.isOk()) {
ALOGE("Client dead, callback aclDataReceived failed");
if (data_handler)
data_handler->SetClientStatus(false, TYPE_BT);
}
}
break;
default:
ALOGE("%s Unexpected event type %d", __func__, type);
break;
}
}
5. 小结
当每次 从 /dev/ttyHSx 中读到 串口上的数据时,最终会回调到 DataHandler::InternalOnPacketReady 函数中:
- 在向 bt.server 传递 hci.event 、acl , 数据时, 会 先定一个 3 s 的定时器。
- 在向 bt.server 回调 数据。当bt.server 处理完成后, 返回。
- 此时 取消之前的定时器。
从这个过程可以清楚的看到, RxThreadTime 定时器就是为了保证 hal -> bt.server 链路,始终处于正常运转的。 如果超时证明当前链路出现问题。 有可能协议栈 没有计时处理。 此时 hal 定时器超时, 杀死 hal 进程。
3. hal 如何从串口读数据并回调
现在 还有一个问题就是 如何从 /dev/ttyHSx 读到数据, 最终为何会回调到 DataHandler::InternalOnPacketReady 函数? 这也是本篇 另外一个重要的议题。
1. 相关函数讲解
1. DataHandler::Open
bool DataHandler::Open(ProtocolType type, InitializeCallback init_cb,
DataReadCallback data_read_cb)
{
...
if (controller_) {
int retry_count = 0;
while (retry_count < INIT_MAX_RETRY_TIMES) {
// 在调用 controller_->Init 中传入了一个回调函数 ,这个回调函数中 调用了 DataHandler::OnPacketReady
status = controller_->Init([this](ProtocolType ptype, HciPacketType type,
const hidl_vec<uint8_t> *hidl_data) {
OnPacketReady(ptype, type, hidl_data);
});
if (status)
break;
++retry_count;
}
}
...
}
// 每次调用 DataHandler::OnPacketReady 就会触发 InternalOnPacketReady 调用
void DataHandler::OnPacketReady(ProtocolType ptype, HciPacketType type,
const hidl_vec<uint8_t>*hidl_data, bool from_soc)
{
...
InternalOnPacketReady(ptype, type, hidl_data, from_soc);
return;
}
2. UartController::Init
status = controller_->Init([this](ProtocolType ptype, HciPacketType type,
const hidl_vec<uint8_t> *hidl_data) {
OnPacketReady(ptype, type, hidl_data);
});
bool UartController::Init(PacketReadCallback pkt_read_cb){
...
ALOGI("soc need reload patch = %d", soc_need_reload_patch);
read_cb_ = pkt_read_cb; // 会将 回调注册到 UartController::read_cb_ 中
...
}
那这里的 read_cb_ 是在哪里回调的?
void UartController::OnPacketReady(hidl_vec<uint8_t> *data)
{
...
if (read_cb_) {
read_cb_(GetProtocol(hci_packet_type_), hci_packet_type_, data);
}
...
}
3. UartController::UartController
那谁来回调 UartController::OnPacketReady
在UartController 构造函数里面,我们将 UartController::OnPacketReady 传递给了 HciPacketizer 对象
HciPacketizer hci_packetizer_;
UartController::UartController(BluetoothSocType soc_type)
: soc_crashed(false), soc_type_(soc_type),
hci_packetizer_([this](hidl_vec<uint8_t> *data) { OnPacketReady(data); })
{
}
4. HciPacketizer
class HciPacketizer {
public:
HciPacketizer(_HciPacketReadyCallback packet_cb)
: _packet_ready_cb_(packet_cb){ HciPacketReadyCallback_flag = 1; };
在 HciPacketizer 构造中,将 UartController::OnPacketReady 保存到了 _packet_ready_cb_
变量中
那 _packet_ready_cb_
在哪里回调
void HciPacketizer::OnDataReady(int fd, HciPacketType packet_type)
{
switch (state_) {
case HCI_PAYLOAD:
if (HciPacketReadyCallback_flag)
{
// 从 /dev/ttyHSx 中读出数据
ssize_t bytes_read = TEMP_FAILURE_RETRY(read(
fd, packet_new_->data() + GetPreambleSizeForType(packet_type)
+ bytes_read_, bytes_remaining_));
retry_read_ = 0;
bytes_remaining_ -= bytes_read;
bytes_read_ += bytes_read;
if (bytes_remaining_ == 0) {
_packet_ready_cb_(packet_new_); // 回调 注册的函数
state_ = HCI_PREAMBLE;
bytes_read_ = 0;
}
break;
} else {
ssize_t bytes_read = TEMP_FAILURE_RETRY(read(
fd, packet_.data() + GetPreambleSizeForType(packet_type) + bytes_read_,
bytes_remaining_));
if (bytes_read == 0) {
// This is only expected if the UART got closed when shutting down.
ALOGE("%s: Unexpected EOF reading the payload!", __func__);
sleep(5); // Expect to be shut down within 5 seconds.
return;
}
if (bytes_read < 0) {
LOG_ALWAYS_FATAL("%s: Read payload error: %s", __func__,
strerror(errno));
}
bytes_remaining_ -= bytes_read;
bytes_read_ += bytes_read;
if (bytes_remaining_ == 0) {
packet_ready_cb_();
state_ = HCI_PREAMBLE;
bytes_read_ = 0;
}
break;
}
}
}
HciPacketizer::OnDataReady 在哪里回调的?
5. UartController::OnDataReady
void UartController::OnDataReady(int fd)
{
...
hci_packetizer_.OnDataReady(fd, hci_packet_type_);
...
}
6. UartController::Init
bool UartController::Init(PacketReadCallback pkt_read_cb)
{
...
// set up the fd watcher now
ret = fd_watcher_.WatchFdForNonBlockingReads(
uart_transport->GetCtrlFd(),
[this](int fd) { OnDataReady(fd); });
...
}
7. AsyncFdWatcher::WatchFdForNonBlockingReads
int AsyncFdWatcher::WatchFdForNonBlockingReads(
int file_descriptor /*这里传入的就是 /dev/ttyHSx , 对应我们的串口 fd */, const ReadCallback& on_read_fd_ready_callback)
{
// Add file descriptor and callback
{
std::unique_lock<std::mutex> guard(internal_mutex_);
watched_fds_[file_descriptor] = on_read_fd_ready_callback; // 将 UartController::OnDataReady 注册到这里
}
// Start the thread if not started yet
return TryStartThread(); // 这里启动了 一个线程
}
int AsyncFdWatcher::TryStartThread()
{
if (std::atomic_exchange(&running_, true)) return 0;
// Set up the communication channel
int pipe_fds[2];
if (pipe2(pipe_fds, O_NONBLOCK)) return -1;
notification_listen_fd_ = pipe_fds[0];
notification_write_fd_ = pipe_fds[1];
thread_ = std::thread([this]() { ThreadRoutine(); }); // 启动线程
if (!thread_.joinable()) return -1;
return 0;
}
8. AsyncFdWatcher::ThreadRoutine
// hidl_hci/1.0/default/async_fd_watcher.cpp
void AsyncFdWatcher::ThreadRoutine()
{
while (running_) {
fd_set read_fds;
FD_ZERO(&read_fds);
FD_SET(notification_listen_fd_, &read_fds);
int max_read_fd = INVALID_FD;
// watched_fds_ 中有 我们打开的 串口 fd
for (auto& it : watched_fds_) {
FD_SET(it.first, &read_fds); // 将 /dev/ttyHSx 对应的fd 加入到 read_fds
max_read_fd = std::max(max_read_fd, it.first);
}
struct timeval timeout;
struct timeval* timeout_ptr = NULL;
if (timeout_ms_ > std::chrono::milliseconds(0)) {
timeout.tv_sec = timeout_ms_.count() / 1000;
timeout.tv_usec = (timeout_ms_.count() % 1000) * 1000;
timeout_ptr = &timeout;
}
// Wait until there is data available to read on some FD.
int nfds = std::max(notification_listen_fd_, max_read_fd);
// 当 串口有数据,或者超时后,select就返回
/*
int select(int nfds, fd_set *readfds, fd_set *writefds,
fd_set *exceptfds, struct timeval *timeout);
*/
int retval = select(nfds + 1, &read_fds/*这里只监听,可读*/, NULL, NULL, timeout_ptr);
// There was some error.
if (retval < 0) continue;
// Timeout.
if (retval == 0) {
// 暂时忽略超时 处理
}
// Read data from the notification FD.
if (FD_ISSET(notification_listen_fd_, &read_fds)) {
char buffer[] = { 0 };
TEMP_FAILURE_RETRY(read(notification_listen_fd_, buffer, 1));
continue;
}
// Invoke the data ready callbacks if appropriate.
std::vector<decltype(watched_fds_) ::value_type> saved_callbacks;
{
std::unique_lock<std::mutex> guard(internal_mutex_);
for (auto& it : watched_fds_) {
// 遍历 注册进入 watched_fds_ 中的fd, 找到可读的
if (FD_ISSET(it.first, &read_fds)) {
// 假如此时找到了 /dev/ttyHSx 可读, 此时将 UartController::OnDataReady 回调加入到 saved_callbacks 中。
saved_callbacks.push_back(it);
}
}
}
for (auto& it : saved_callbacks) {
if (it.second) {
it.second(it.first); // 这里挨个调用 加入到 saved_callbacks 中的回调,此时 就会回调 UartController::OnDataReady
}
}
}
ALOGE("%s: End of AsyncFdWatcher::ThreadRoutine", __func__);
}
2. 回调路径总结
DataHandler::Open()
controller_->Init(携带 DataHandler::OnPacketReady 回调 )
启动 AsyncFdWatcher::ThreadRoutine 监听 /dev/ttyHSx 是否可读
UartController::OnPacketReady 的回调会触发 DataHandler::OnPacketReady 回调
在 UartController 构造中,将 UartController::OnPacketReady 传入 HciPacketizer 中
最终 在 HciPacketizer::OnDataReady 中会触发 UartController::OnPacketReady 回调
UartController::OnDataReady 会触发 HciPacketizer::OnDataReady
AsyncFdWatcher::ThreadRoutine 中监听 /dev/ttyHSx fd,如果可读,此时会回调 UartController::OnDataReady
所以当 AsyncFdWatcher::ThreadRoutine 监听到 /dev/ttyHSx 有数据可读:
-> UartController::OnDataReady -> HciPacketizer::OnDataReady -> UartController::OnPacketReady -> DataHandler::OnPacketReady -> DataHandler::InternalOnPacketReady
希望你没有学废, 反正我已经废了。 欢迎评论。一起进步!!!