OpenJDK 17 源码,安全点轮询的信号处理流程如下(重点分析安全点轮询相关部分):
核心信号处理流程
信号触发:
当线程访问安全点轮询内存页时(
SafepointMechanism::is_poll_address
),会触发SIGSEGV
信号触发位置在
MacroAssembler::safepoint_poll
生成的汇编指令中
信号入口:
cpp
extern "C" JNIEXPORT int JVM_HANDLE_XXX_SIGNAL(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrecognized)
这是 JVM 的全局信号处理入口
调用
PosixSignals::pd_hotspot_signal_handler
进行平台相关处理
安全点轮询识别:
cpp
if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) { stub = SharedRuntime::get_poll_stub(pc); }
关键检查:信号必须是
SIGSEGV
且访问地址是安全点轮询页
获取处理桩:
cpp
address SharedRuntime::get_poll_stub(address pc) { bool at_poll_return = ((CompiledMethod*)cb)->is_at_poll_return(pc); if (at_poll_return) { stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); } else { stub = SharedRuntime::polling_page_safepoint_handler_blob()->entry_point(); } }
区分两种轮询类型:
POLL_AT_RETURN
:方法返回前的轮询POLL_AT_LOOP
:循环中的普通轮询
返回对应的处理桩入口地址
处理桩跳转:
cpp
if (stub != NULL) { if (thread != NULL) thread->set_saved_exception_pc(pc); os::Posix::ucontext_set_pc(uc, stub); return true; }
保存原始 PC 到线程状态(用于后续恢复)
修改上下文中的 PC 寄存器指向处理桩代码
信号处理返回后,CPU 会跳转到桩代码执行
处理桩执行流程
桩代码准备:
cpp
_polling_page_return_handler_blob = generate_handler_blob( CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), POLL_AT_RETURN);
JVM 启动时生成两个处理桩:
返回轮询桩:
polling_page_return_handler_blob
普通轮询桩:
polling_page_safepoint_handler_blob
桩代码操作(以返回轮询为例):
cpp
__ bind(entry->_stub_label); __ lea(rscratch1, safepoint_pc); // 计算安全点返回地址 __ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1); __ jump(callback_addr); // 跳转到 handle_polling_page_exception
保存原始返回地址到线程状态
跳转到统一的处理函数
安全点处理核心:
cpp
void SafepointSynchronize::handle_polling_page_exception(JavaThread *thread) { ThreadSafepointState* state = thread->safepoint_state(); state->handle_polling_page_exception(); }
调用线程安全点状态的处理方法
最终会使线程阻塞,直到安全点操作完成
关键设计要点
两级跳转设计:
text
信号处理 → 桩代码 → handle_polling_page_exception
信号处理只负责重定向到桩代码
桩代码负责保存精确上下文
C++ 函数实现阻塞逻辑
上下文保存:
信号处理中保存
saved_exception_pc
桩代码中保存
safepoint_pc
确保安全点操作后能精确恢复执行
类型区分:
普通轮询:
POLL_AT_LOOP
返回轮询:
POLL_AT_RETURN
不同轮询类型使用不同处理桩
异步信号安全:
使用预生成的汇编桩代码(blob)
避免在信号处理中调用复杂 C++ 函数
仅通过修改 PC 寄存器实现控制流转移
执行流程图
图表
代码
特别注意事项
栈水印检查:
返回轮询(
POLL_AT_RETURN
)包含额外的栈水印检查使用
cmpptr(rsp/rbp, polling_word)
指令确保栈收缩时的内存安全
日志追踪:
代码中添加的日志点(如
tty->print_cr
)有助于调试可以追踪线程进入安全点的完整路径
平台适配:
通过模板元编程实现条件编译(
select_emit_stub
)仅在水印屏障支持的平台启用相关逻辑
这种设计实现了高效的安全点协作机制,在保证正确性的同时最小化性能开销,通过信号处理+桩代码+C++函数的组合,实现了从硬件异常到JVM安全点管理的无缝衔接。
##源码
void SafepointSynchronize::handle_polling_page_exception(JavaThread *thread) {
// yym-gaizao 注释 添加日志:进入 handle_polling_page_exception
// 获取线程名字并打印
oop thread_obj = thread->threadObj();
const char* thread_name = "UNKNOWN";
if (thread_obj != nullptr) {
oop name_oop = java_lang_Thread::name(thread_obj);
if (name_oop != nullptr) {
thread_name = java_lang_String::as_utf8_string(name_oop);
}
}
tty->print_cr("Entering handle_polling_page_exception for thread: %s", thread_name);
assert(thread->thread_state() == _thread_in_Java, "should come from Java code");
// Enable WXWrite: the function is called implicitly from java code.
MACOS_AARCH64_ONLY(ThreadWXEnable wx(WXWrite, thread));
if (log_is_enabled(Info, safepoint, stats)) {
Atomic::inc(&_nof_threads_hit_polling_page);
}
ThreadSafepointState* state = thread->safepoint_state();
state->handle_polling_page_exception();
}
static SafepointBlob* polling_page_safepoint_handler_blob() { return _polling_page_safepoint_handler_blob; }
_polling_page_safepoint_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), POLL_AT_LOOP);
static SafepointBlob* polling_page_safepoint_handler_blob() { return _polling_page_safepoint_handler_blob; }
address SharedRuntime::get_poll_stub(address pc) {
// yym-gaizao 注释添加日志:正在获取 Poll Stub
tty->print_cr("Fetching poll stub for pc " INTPTR_FORMAT, p2i(pc));
address stub;
// Look up the code blob
CodeBlob *cb = CodeCache::find_blob(pc);
// Should be an nmethod
guarantee(cb != NULL && cb->is_compiled(), "safepoint polling: pc must refer to an nmethod");
// Look up the relocation information
assert(((CompiledMethod*)cb)->is_at_poll_or_poll_return(pc),
"safepoint polling: type must be poll");
#ifdef ASSERT
if (!((NativeInstruction*)pc)->is_safepoint_poll()) {
tty->print_cr("bad pc: " PTR_FORMAT, p2i(pc));
Disassembler::decode(cb);
fatal("Only polling locations are used for safepoint");
}
#endif
bool at_poll_return = ((CompiledMethod*)cb)->is_at_poll_return(pc);
bool has_wide_vectors = ((CompiledMethod*)cb)->has_wide_vectors();
if (at_poll_return) {
assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
"polling page return stub not created yet");
stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
} else if (has_wide_vectors) {
assert(SharedRuntime::polling_page_vectors_safepoint_handler_blob() != NULL,
"polling page vectors safepoint stub not created yet");
stub = SharedRuntime::polling_page_vectors_safepoint_handler_blob()->entry_point();
} else {
assert(SharedRuntime::polling_page_safepoint_handler_blob() != NULL,
"polling page safepoint stub not created yet");
stub = SharedRuntime::polling_page_safepoint_handler_blob()->entry_point();
}
log_debug(safepoint)("... found polling page %s exception at pc = "
INTPTR_FORMAT ", stub =" INTPTR_FORMAT,
at_poll_return ? "return" : "loop",
(intptr_t)pc, (intptr_t)stub);
return stub;
}
bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
ucontext_t* uc, JavaThread* thread) {
//gaizao
fprintf(stderr, "[SIGNAL] ThreadID=%d accessing poll page\n", (int)syscall(SYS_gettid));
const char* msg = "@@@@yym---pd_hotspot_signal_handler----\n";
write(STDERR_FILENO, msg, strlen(msg)); // 直接写入文件描述符
/*
NOTE: does not seem to work on linux.
if (info == NULL || info->si_code <= 0 || info->si_code == SI_NOINFO) {
// can't decode this kind of signal
info = NULL;
} else {
assert(sig == info->si_signo, "bad siginfo");
}
*/
// decide if this trap can be handled by a stub
address stub = NULL;
address pc = NULL;
//%note os_trap_1
if (info != NULL && uc != NULL && thread != NULL) {
pc = (address) os::Posix::ucontext_get_pc(uc);
#ifndef AMD64
// Halt if SI_KERNEL before more crashes get misdiagnosed as Java bugs
// This can happen in any running code (currently more frequently in
// interpreter code but has been seen in compiled code)
if (sig == SIGSEGV && info->si_addr == 0 && info->si_code == SI_KERNEL) {
fatal("An irrecoverable SI_KERNEL SIGSEGV has occurred due "
"to unstable signal handling in this distribution.");
}
#endif // AMD64
// Handle ALL stack overflow variations here
if (sig == SIGSEGV) {
address addr = (address) info->si_addr;
// check if fault address is within thread stack
if (thread->is_in_full_stack(addr)) {
// stack overflow
if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
return true; // continue
}
}
}
if ((sig == SIGSEGV) && VM_Version::is_cpuinfo_segv_addr(pc)) {
// Verify that OS save/restore AVX registers.
stub = VM_Version::cpuinfo_cont_addr();
}
if (thread->thread_state() == _thread_in_Java) {
// Java thread running in Java code => find exception handler if any
// a fault inside compiled code, the interpreter, or a stub
if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) {
stub = SharedRuntime::get_poll_stub(pc);
} else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
// BugId 4454115: A read from a MappedByteBuffer can fault
// here if the underlying file has been truncated.
// Do not crash the VM in such a case.
CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
bool is_unsafe_arraycopy = thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc);
if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
address next_pc = Assembler::locate_next_instruction(pc);
if (is_unsafe_arraycopy) {
next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
}
stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
}
}
else
#ifdef AMD64
if (sig == SIGFPE &&
(info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
stub =
SharedRuntime::
continuation_for_implicit_exception(thread,
pc,
SharedRuntime::
IMPLICIT_DIVIDE_BY_ZERO);
#else
if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) {
// HACK: si_code does not work on linux 2.2.12-20!!!
int op = pc[0];
if (op == 0xDB) {
// FIST
// TODO: The encoding of D2I in x86_32.ad can cause an exception
// prior to the fist instruction if there was an invalid operation
// pending. We want to dismiss that exception. From the win_32
// side it also seems that if it really was the fist causing
// the exception that we do the d2i by hand with different
// rounding. Seems kind of weird.
// NOTE: that we take the exception at the NEXT floating point instruction.
assert(pc[0] == 0xDB, "not a FIST opcode");
assert(pc[1] == 0x14, "not a FIST opcode");
assert(pc[2] == 0x24, "not a FIST opcode");
return true;
} else if (op == 0xF7) {
// IDIV
stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
} else {
// TODO: handle more cases if we are using other x86 instructions
// that can generate SIGFPE signal on linux.
tty->print_cr("unknown opcode 0x%X with SIGFPE.", op);
fatal("please update this code.");
}
#endif // AMD64
} else if (sig == SIGSEGV &&
MacroAssembler::uses_implicit_null_check(info->si_addr)) {
// Determination of interpreter/vtable stub/compiled code null exception
stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
}
} else if ((thread->thread_state() == _thread_in_vm ||
thread->thread_state() == _thread_in_native) &&
(sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
thread->doing_unsafe_access())) {
address next_pc = Assembler::locate_next_instruction(pc);
if (UnsafeCopyMemory::contains_pc(pc)) {
next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
}
stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
}
// jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
// and the heap gets shrunk before the field access.
if ((sig == SIGSEGV) || (sig == SIGBUS)) {
address addr = JNI_FastGetField::find_slowcase_pc(pc);
if (addr != (address)-1) {
stub = addr;
}
}
}
#ifndef AMD64
// Execution protection violation
//
// This should be kept as the last step in the triage. We don't
// have a dedicated trap number for a no-execute fault, so be
// conservative and allow other handlers the first shot.
//
// Note: We don't test that info->si_code == SEGV_ACCERR here.
// this si_code is so generic that it is almost meaningless; and
// the si_code for this condition may change in the future.
// Furthermore, a false-positive should be harmless.
if (UnguardOnExecutionViolation > 0 &&
stub == NULL &&
(sig == SIGSEGV || sig == SIGBUS) &&
uc->uc_mcontext.gregs[REG_TRAPNO] == trap_page_fault) {
int page_size = os::vm_page_size();
address addr = (address) info->si_addr;
address pc = os::Posix::ucontext_get_pc(uc);
// Make sure the pc and the faulting address are sane.
//
// If an instruction spans a page boundary, and the page containing
// the beginning of the instruction is executable but the following
// page is not, the pc and the faulting address might be slightly
// different - we still want to unguard the 2nd page in this case.
//
// 15 bytes seems to be a (very) safe value for max instruction size.
bool pc_is_near_addr =
(pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15);
bool instr_spans_page_boundary =
(align_down((intptr_t) pc ^ (intptr_t) addr,
(intptr_t) page_size) > 0);
if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) {
static volatile address last_addr =
(address) os::non_memory_address_word();
// In conservative mode, don't unguard unless the address is in the VM
if (addr != last_addr &&
(UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) {
// Set memory to RWX and retry
address page_start = align_down(addr, page_size);
bool res = os::protect_memory((char*) page_start, page_size,
os::MEM_PROT_RWX);
log_debug(os)("Execution protection violation "
"at " INTPTR_FORMAT
", unguarding " INTPTR_FORMAT ": %s, errno=%d", p2i(addr),
p2i(page_start), (res ? "success" : "failed"), errno);
stub = pc;
// Set last_addr so if we fault again at the same address, we don't end
// up in an endless loop.
//
// There are two potential complications here. Two threads trapping at
// the same address at the same time could cause one of the threads to
// think it already unguarded, and abort the VM. Likely very rare.
//
// The other race involves two threads alternately trapping at
// different addresses and failing to unguard the page, resulting in
// an endless loop. This condition is probably even more unlikely than
// the first.
//
// Although both cases could be avoided by using locks or thread local
// last_addr, these solutions are unnecessary complication: this
// handler is a best-effort safety net, not a complete solution. It is
// disabled by default and should only be used as a workaround in case
// we missed any no-execute-unsafe VM code.
last_addr = addr;
}
}
}
#endif // !AMD64
if (stub != NULL) {
// save all thread context in case we need to restore it
if (thread != NULL) thread->set_saved_exception_pc(pc);
os::Posix::ucontext_set_pc(uc, stub);
return true;
}
return false;
}
#define JVM_HANDLE_XXX_SIGNAL JVM_handle_linux_signal
#else
#error who are you?
#endif
extern "C" JNIEXPORT
int JVM_HANDLE_XXX_SIGNAL(int sig, siginfo_t* info,
void* ucVoid, int abort_if_unrecognized)
{
//yym-gaizao
const char* msg = "@@@@yym---JVM_HANDLE_XXX_SIGNAL----\n";
write(STDERR_FILENO, msg, strlen(msg)); // 直接写入文件描述符
assert(info != NULL && ucVoid != NULL, "sanity");
// Note: it's not uncommon that JNI code uses signal/sigset to install,
// then restore certain signal handler (e.g. to temporarily block SIGPIPE,
// or have a SIGILL handler when detecting CPU type). When that happens,
// this handler might be invoked with junk info/ucVoid. To avoid unnecessary
// crash when libjsig is not preloaded, try handle signals that do not require
// siginfo/ucontext first.
// Preserve errno value over signal handler.
// (note: RAII ok here, even with JFR thread crash protection, see below).
ErrnoPreserver ep;
// Unblock all synchronous error signals (see JDK-8252533)
PosixSignals::unblock_error_signals();
ucontext_t* const uc = (ucontext_t*) ucVoid;
Thread* const t = Thread::current_or_null_safe();
// Handle JFR thread crash protection.
// Note: this may cause us to longjmp away. Do not use any code before this
// point which really needs any form of epilogue code running, eg RAII objects.
os::ThreadCrashProtection::check_crash_protection(sig, t);
bool signal_was_handled = false;
// Handle assertion poison page accesses.
#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
if (!signal_was_handled &&
((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison)) {
signal_was_handled = handle_assert_poison_fault(ucVoid, info->si_addr);
}
#endif
if (!signal_was_handled) {
// Handle SafeFetch access.
#ifndef ZERO
if (uc != NULL) {
address pc = os::Posix::ucontext_get_pc(uc);
if (StubRoutines::is_safefetch_fault(pc)) {
os::Posix::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
signal_was_handled = true;
}
}
#else
// See JDK-8076185
if (sig == SIGSEGV || sig == SIGBUS) {
sigjmp_buf* const pjb = get_jmp_buf_for_continuation();
if (pjb) {
siglongjmp(*pjb, 1);
}
}
#endif // ZERO
}
// Ignore SIGPIPE and SIGXFSZ (4229104, 6499219).
if (!signal_was_handled &&
(sig == SIGPIPE || sig == SIGXFSZ)) {
PosixSignals::chained_handler(sig, info, ucVoid);
signal_was_handled = true; // unconditionally.
}
// Call platform dependent signal handler.
if (!signal_was_handled) {
JavaThread* const jt = (t != NULL && t->is_Java_thread()) ? (JavaThread*) t : NULL;
signal_was_handled = PosixSignals::pd_hotspot_signal_handler(sig, info, uc, jt);
}
// From here on, if the signal had not been handled, it is a fatal error.
// Give the chained signal handler - should it exist - a shot.
if (!signal_was_handled) {
signal_was_handled = PosixSignals::chained_handler(sig, info, ucVoid);
}
// Invoke fatal error handling.
if (!signal_was_handled && abort_if_unrecognized) {
// Extract pc from context for the error handler to display.
address pc = NULL;
if (uc != NULL) {
// prepare fault pc address for error reporting.
if (S390_ONLY(sig == SIGILL || sig == SIGFPE) NOT_S390(false)) {
pc = (address)info->si_addr;
} else if (ZERO_ONLY(true) NOT_ZERO(false)) {
// Non-arch-specific Zero code does not really know the pc.
// This can be alleviated by making arch-specific os::Posix::ucontext_get_pc
// available for Zero for known architectures. But for generic Zero
// code, it would still remain unknown.
pc = NULL;
} else {
pc = os::Posix::ucontext_get_pc(uc);
}
}
// For Zero, we ignore the crash context, because:
// a) The crash would be in C++ interpreter code, so context is not really relevant;
// b) Generic Zero code would not be able to parse it, so when generic error
// reporting code asks e.g. about frames on stack, Zero would experience
// a secondary ShouldNotCallThis() crash.
VMError::report_and_die(t, sig, pc, info, NOT_ZERO(ucVoid) ZERO_ONLY(NULL));
// VMError should not return.
ShouldNotReachHere();
}
return signal_was_handled;
}