Index: jdk17u-jdk-17.0.5-8/make/autoconf/jvm-features.m4 =================================================================== --- jdk17u-jdk-17.0.5-8.orig/make/autoconf/jvm-features.m4 +++ jdk17u-jdk-17.0.5-8/make/autoconf/jvm-features.m4 @@ -308,7 +308,8 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_SHENAN AC_MSG_CHECKING([if platform is supported by Shenandoah]) if test "x$OPENJDK_TARGET_CPU_ARCH" = "xx86" || \ test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \ - test "x$OPENJDK_TARGET_CPU" = "xppc64le"; then + test "x$OPENJDK_TARGET_CPU" = "xppc64le" || \ + test "x$OPENJDK_TARGET_CPU" = "xriscv64"; then AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no, $OPENJDK_TARGET_CPU]) @@ -358,7 +359,8 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_ZGC], AC_MSG_RESULT([no, $OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU]) AVAILABLE=false fi - elif test "x$OPENJDK_TARGET_CPU" = "xppc64le"; then + elif test "x$OPENJDK_TARGET_CPU" = "xppc64le" || \ + test "x$OPENJDK_TARGET_CPU" = "xriscv64"; then if test "x$OPENJDK_TARGET_OS" = "xlinux"; then AC_MSG_RESULT([yes]) else Index: jdk17u-jdk-17.0.5-8/make/autoconf/libraries.m4 =================================================================== --- jdk17u-jdk-17.0.5-8.orig/make/autoconf/libraries.m4 +++ jdk17u-jdk-17.0.5-8/make/autoconf/libraries.m4 @@ -1,5 +1,5 @@ # -# Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -148,6 +148,12 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES], fi fi + # Because RISC-V only has word-sized atomics, it requries libatomic where + # other common architectures do not. So link libatomic by default. + if test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xriscv64; then + BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic" + fi + # perfstat lib if test "x$OPENJDK_TARGET_OS" = xaix; then BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lperfstat" Index: jdk17u-jdk-17.0.5-8/make/autoconf/platform.m4 =================================================================== --- jdk17u-jdk-17.0.5-8.orig/make/autoconf/platform.m4 +++ jdk17u-jdk-17.0.5-8/make/autoconf/platform.m4 @@ -1,5 +1,5 @@ # -# Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -561,6 +561,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HEL HOTSPOT_$1_CPU_DEFINE=PPC64 elif test "x$OPENJDK_$1_CPU" = xppc64le; then HOTSPOT_$1_CPU_DEFINE=PPC64 + elif test "x$OPENJDK_$1_CPU" = xriscv64; then + HOTSPOT_$1_CPU_DEFINE=RISCV64 # The cpu defines below are for zero, we don't support them directly. elif test "x$OPENJDK_$1_CPU" = xsparc; then @@ -571,8 +573,6 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HEL HOTSPOT_$1_CPU_DEFINE=S390 elif test "x$OPENJDK_$1_CPU" = xs390x; then HOTSPOT_$1_CPU_DEFINE=S390 - elif test "x$OPENJDK_$1_CPU" = xriscv64; then - HOTSPOT_$1_CPU_DEFINE=RISCV elif test "x$OPENJDK_$1_CPU" != x; then HOTSPOT_$1_CPU_DEFINE=$(echo $OPENJDK_$1_CPU | tr a-z A-Z) fi Index: jdk17u-jdk-17.0.5-8/make/hotspot/gensrc/GensrcAdlc.gmk =================================================================== --- jdk17u-jdk-17.0.5-8.orig/make/hotspot/gensrc/GensrcAdlc.gmk +++ jdk17u-jdk-17.0.5-8/make/hotspot/gensrc/GensrcAdlc.gmk @@ -1,5 +1,5 @@ # -# Copyright (c) 2013, 2021, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -149,6 +149,13 @@ ifeq ($(call check-jvm-feature, compiler ))) endif + ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv) + AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \ + ))) + endif + ifeq ($(call check-jvm-feature, shenandoahgc), true) AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \ Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -1590,7 +1590,9 @@ void LIR_Assembler::emit_compare_and_swa } -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { + assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on aarch64"); Assembler::Condition acond, ncond; switch (condition) { Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1417,7 +1417,10 @@ void LIR_Assembler::emit_compare_and_swa } -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { + assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on arm"); + AsmCondition acond = al; AsmCondition ncond = nv; if (opr1 != opr2) { Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2021 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -1554,8 +1554,10 @@ inline void load_to_reg(LIR_Assembler *l } } +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { + assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on ppc"); -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { if (opr1->is_equal(opr2) || opr1->is_same_register(opr2)) { load_to_reg(this, opr1, result); // Condition doesn't matter. return; Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/constMethod.hpp" +#include "oops/klass.inline.hpp" +#include "oops/method.hpp" +#include "runtime/frame.inline.hpp" +#include "utilities/align.hpp" +#include "utilities/debug.hpp" +#include "utilities/macros.hpp" + +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : i = 4; break; + case T_LONG : i = 5; break; + case T_VOID : i = 6; break; + case T_FLOAT : i = 7; break; + case T_DOUBLE : i = 8; break; + case T_OBJECT : i = 9; break; + case T_ARRAY : i = 9; break; + default : ShouldNotReachHere(); + } + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, + "index out of bounds"); + return i; +} + +// How much stack a method activation needs in words. +int AbstractInterpreter::size_top_interpreter_activation(Method* method) { + const int entry_size = frame::interpreter_frame_monitor_size(); + + // total overhead size: entry_size + (saved fp thru expr stack + // bottom). be sure to change this if you add/subtract anything + // to/from the overhead area + const int overhead_size = + -(frame::interpreter_frame_initial_sp_offset) + entry_size; + + const int stub_code = frame::entry_frame_after_call_words; + assert_cond(method != NULL); + const int method_stack = (method->max_locals() + method->max_stack()) * + Interpreter::stackElementWords; + return (overhead_size + method_stack + stub_code); +} + +// asm based interpreter deoptimization helpers +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + // Note: This calculation must exactly parallel the frame setup + // in TemplateInterpreterGenerator::generate_method_entry. + + // fixed size of an interpreter frame: + int overhead = frame::sender_sp_offset - + frame::interpreter_frame_initial_sp_offset; + // Our locals were accounted for by the caller (or last_frame_adjust + // on the transistion) Since the callee parameters already account + // for the callee's params we only need to account for the extra + // locals. + int size = overhead + + (callee_locals - callee_params) + + monitors * frame::interpreter_frame_monitor_size() + + // On the top frame, at all times SP <= ESP, and SP is + // 16-aligned. We ensure this by adjusting SP on method + // entry and re-entry to allow room for the maximum size of + // the expression stack. When we call another method we bump + // SP so that no stack space is wasted. So, only on the top + // frame do we need to allow max_stack words. + (is_top_frame ? max_stack : temps + extra_args); + + // On riscv we always keep the stack pointer 16-aligned, so we + // must round up here. + size = align_up(size, 2); + + return size; +} + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // The frame interpreter_frame is guaranteed to be the right size, + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a + // skeletal state + assert_cond(method != NULL && caller != NULL && interpreter_frame != NULL); + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = (method->max_locals() - method->size_of_parameters()) * + Interpreter::stackElementWords; + +#ifdef ASSERT + assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable"); +#endif + + interpreter_frame->interpreter_frame_set_method(method); + // NOTE the difference in using sender_sp and interpreter_frame_sender_sp + // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) + // and sender_sp is fp + intptr_t* locals = NULL; + if (caller->is_interpreted_frame()) { + locals = caller->interpreter_frame_last_sp() + caller_actual_parameters - 1; + } else { + locals = interpreter_frame->sender_sp() + max_locals - 1; + } + +#ifdef ASSERT + if (caller->is_interpreted_frame()) { + assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); + } +#endif + + interpreter_frame->interpreter_frame_set_locals(locals); + BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); + BasicObjectLock* monbot = montop - moncount; + interpreter_frame->interpreter_frame_set_monitor_end(monbot); + + // Set last_sp + intptr_t* last_sp = (intptr_t*) monbot - + tempcount*Interpreter::stackElementWords - + popframe_extra_args; + interpreter_frame->interpreter_frame_set_last_sp(last_sp); + + // All frames but the initial (oldest) interpreter frame we fill in have + // a value for sender_sp that allows walking the stack but isn't + // truly correct. Correct the value here. + if (extra_locals != 0 && + interpreter_frame->sender_sp() == + interpreter_frame->interpreter_frame_sender_sp()) { + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + + extra_locals); + } + + *interpreter_frame->interpreter_frame_cache_addr() = + method->constants()->cache(); + *interpreter_frame->interpreter_frame_mirror_addr() = + method->method_holder()->java_mirror(); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/assembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/assembler_riscv.cpp @@ -0,0 +1,372 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include +#include + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/sharedRuntime.hpp" + +int AbstractAssembler::code_fill_byte() { + return 0; +} + +void Assembler::add(Register Rd, Register Rn, int64_t increment, Register temp) { + if (is_imm_in_range(increment, 12, 0)) { + addi(Rd, Rn, increment); + } else { + assert_different_registers(Rn, temp); + li(temp, increment); + add(Rd, Rn, temp); + } +} + +void Assembler::addw(Register Rd, Register Rn, int64_t increment, Register temp) { + if (is_imm_in_range(increment, 12, 0)) { + addiw(Rd, Rn, increment); + } else { + assert_different_registers(Rn, temp); + li(temp, increment); + addw(Rd, Rn, temp); + } +} + +void Assembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) { + if (is_imm_in_range(-decrement, 12, 0)) { + addi(Rd, Rn, -decrement); + } else { + assert_different_registers(Rn, temp); + li(temp, decrement); + sub(Rd, Rn, temp); + } +} + +void Assembler::subw(Register Rd, Register Rn, int64_t decrement, Register temp) { + if (is_imm_in_range(-decrement, 12, 0)) { + addiw(Rd, Rn, -decrement); + } else { + assert_different_registers(Rn, temp); + li(temp, decrement); + subw(Rd, Rn, temp); + } +} + +void Assembler::zext_w(Register Rd, Register Rs) { + add_uw(Rd, Rs, zr); +} + +void Assembler::_li(Register Rd, int64_t imm) { + // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff + int shift = 12; + int64_t upper = imm, lower = imm; + // Split imm to a lower 12-bit sign-extended part and the remainder, + // because addi will sign-extend the lower imm. + lower = ((int32_t)imm << 20) >> 20; + upper -= lower; + + // Test whether imm is a 32-bit integer. + if (!(((imm) & ~(int64_t)0x7fffffff) == 0 || + (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) { + while (((upper >> shift) & 1) == 0) { shift++; } + upper >>= shift; + li(Rd, upper); + slli(Rd, Rd, shift); + if (lower != 0) { + addi(Rd, Rd, lower); + } + } else { + // 32-bit integer + Register hi_Rd = zr; + if (upper != 0) { + lui(Rd, (int32_t)upper); + hi_Rd = Rd; + } + if (lower != 0 || hi_Rd == zr) { + addiw(Rd, hi_Rd, lower); + } + } +} + +void Assembler::li64(Register Rd, int64_t imm) { + // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or + // (imm[31:28] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1. + int64_t lower = imm & 0xffffffff; + lower -= ((lower << 44) >> 44); + int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; + int32_t upper = (tmp_imm - (int32_t)lower) >> 32; + + // Load upper 32 bits + int64_t up = upper, lo = upper; + lo = (lo << 52) >> 52; + up -= lo; + up = (int32_t)up; + lui(Rd, up); + addi(Rd, Rd, lo); + + // Load the rest 32 bits. + slli(Rd, Rd, 12); + addi(Rd, Rd, (int32_t)lower >> 20); + slli(Rd, Rd, 12); + lower = ((int32_t)imm << 12) >> 20; + addi(Rd, Rd, lower); + slli(Rd, Rd, 8); + lower = imm & 0xff; + addi(Rd, Rd, lower); +} + +void Assembler::li32(Register Rd, int32_t imm) { + // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit + int64_t upper = imm, lower = imm; + lower = (imm << 20) >> 20; + upper -= lower; + upper = (int32_t)upper; + // lui Rd, imm[31:12] + imm[11] + lui(Rd, upper); + // use addiw to distinguish li32 to li64 + addiw(Rd, Rd, lower); +} + +#define INSN(NAME, REGISTER) \ + void Assembler::NAME(const address &dest, Register temp) { \ + assert_cond(dest != NULL); \ + int64_t distance = dest - pc(); \ + if (is_imm_in_range(distance, 20, 1)) { \ + jal(REGISTER, distance); \ + } else { \ + assert(temp != noreg, "temp must not be empty register!"); \ + int32_t offset = 0; \ + movptr_with_offset(temp, dest, offset); \ + jalr(REGISTER, temp, offset); \ + } \ + } \ + void Assembler::NAME(Label &l, Register temp) { \ + jal(REGISTER, l, temp); \ + } \ + + INSN(j, x0); + INSN(jal, x1); + +#undef INSN + +#define INSN(NAME, REGISTER) \ + void Assembler::NAME(Register Rs) { \ + jalr(REGISTER, Rs, 0); \ + } + + INSN(jr, x0); + INSN(jalr, x1); + +#undef INSN + +void Assembler::ret() { + jalr(x0, x1, 0); +} + +#define INSN(NAME, REGISTER) \ + void Assembler::NAME(const address &dest, Register temp) { \ + assert_cond(dest != NULL); \ + assert(temp != noreg, "temp must not be empty register!"); \ + int64_t distance = dest - pc(); \ + if (is_offset_in_range(distance, 32)) { \ + auipc(temp, distance + 0x800); \ + jalr(REGISTER, temp, ((int32_t)distance << 20) >> 20); \ + } else { \ + int32_t offset = 0; \ + movptr_with_offset(temp, dest, offset); \ + jalr(REGISTER, temp, offset); \ + } \ + } + + INSN(call, x1); + INSN(tail, x0); + +#undef INSN + +#define INSN(NAME, REGISTER) \ + void Assembler::NAME(const Address &adr, Register temp) { \ + switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(adr.target(), temp); \ + break; \ + } \ + case Address::base_plus_offset: { \ + int32_t offset = 0; \ + baseOffset(temp, adr, offset); \ + jalr(REGISTER, temp, offset); \ + break; \ + } \ + default: \ + ShouldNotReachHere(); \ + } \ + } + + INSN(j, x0); + INSN(jal, x1); + INSN(call, x1); + INSN(tail, x0); + +#undef INSN + +void Assembler::wrap_label(Register r1, Register r2, Label &L, compare_and_branch_insn insn, + compare_and_branch_label_insn neg_insn, bool is_far) { + if (is_far) { + Label done; + (this->*neg_insn)(r1, r2, done, /* is_far */ false); + j(L); + bind(done); + } else { + if (L.is_bound()) { + (this->*insn)(r1, r2, target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(r1, r2, pc()); + } + } +} + +void Assembler::wrap_label(Register Rt, Label &L, Register tmp, load_insn_by_temp insn) { + if (L.is_bound()) { + (this->*insn)(Rt, target(L), tmp); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(Rt, pc(), tmp); + } +} + +void Assembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) { + if (L.is_bound()) { + (this->*insn)(Rt, target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(Rt, pc()); + } +} + +void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) { + uintptr_t imm64 = (uintptr_t)addr; +#ifndef PRODUCT + { + char buffer[64]; + snprintf(buffer, sizeof(buffer), "0x%" PRIx64, imm64); + block_comment(buffer); + } +#endif + assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1), + "48-bit overflow in address constant"); + // Load upper 32 bits + int32_t imm = imm64 >> 16; + int64_t upper = imm, lower = imm; + lower = (lower << 52) >> 52; + upper -= lower; + upper = (int32_t)upper; + lui(Rd, upper); + addi(Rd, Rd, lower); + + // Load the rest 16 bits. + slli(Rd, Rd, 11); + addi(Rd, Rd, (imm64 >> 5) & 0x7ff); + slli(Rd, Rd, 5); + + // This offset will be used by following jalr/ld. + offset = imm64 & 0x1f; +} + +void Assembler::movptr(Register Rd, uintptr_t imm64) { + movptr(Rd, (address)imm64); +} + +void Assembler::movptr(Register Rd, address addr) { + int offset = 0; + movptr_with_offset(Rd, addr, offset); + addi(Rd, Rd, offset); +} + +void Assembler::ifence() { + fence_i(); + if (UseConservativeFence) { + fence(ir, ir); + } +} + +#define INSN(NAME, NEG_INSN) \ + void Assembler::NAME(Register Rs, Register Rt, const address &dest) { \ + NEG_INSN(Rt, Rs, dest); \ + } \ + void Assembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) { \ + NEG_INSN(Rt, Rs, l, is_far); \ + } + + INSN(bgt, blt); + INSN(ble, bge); + INSN(bgtu, bltu); + INSN(bleu, bgeu); +#undef INSN + +#undef __ + +Address::Address(address target, relocInfo::relocType rtype) : _base(noreg), _offset(0), _mode(literal) { + _target = target; + switch (rtype) { + case relocInfo::oop_type: + case relocInfo::metadata_type: + // Oops are a special case. Normally they would be their own section + // but in cases like icBuffer they are literals in the code stream that + // we don't have a section for. We use none so that we get a literal address + // which is always patchable. + break; + case relocInfo::external_word_type: + _rspec = external_word_Relocation::spec(target); + break; + case relocInfo::internal_word_type: + _rspec = internal_word_Relocation::spec(target); + break; + case relocInfo::opt_virtual_call_type: + _rspec = opt_virtual_call_Relocation::spec(); + break; + case relocInfo::static_call_type: + _rspec = static_call_Relocation::spec(); + break; + case relocInfo::runtime_call_type: + _rspec = runtime_call_Relocation::spec(); + break; + case relocInfo::poll_type: + case relocInfo::poll_return_type: + _rspec = Relocation::spec_simple(rtype); + break; + case relocInfo::none: + _rspec = RelocationHolder::none; + break; + default: + ShouldNotReachHere(); + } +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/assembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -0,0 +1,3049 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_ASSEMBLER_RISCV_HPP +#define CPU_RISCV_ASSEMBLER_RISCV_HPP + +#include "asm/register.hpp" +#include "assembler_riscv.inline.hpp" +#include "metaprogramming/enableIf.hpp" + +#define XLEN 64 + +// definitions of various symbolic names for machine registers + +// First intercalls between C and Java which use 8 general registers +// and 8 floating registers + +class Argument { + public: + enum { + n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...) + n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... ) + + n_int_register_parameters_j = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...) + n_float_register_parameters_j = 8 // f10, f11, ... f17 (j_farg0, j_farg1, ...) + }; +}; + +// function argument(caller-save registers) +REGISTER_DECLARATION(Register, c_rarg0, x10); +REGISTER_DECLARATION(Register, c_rarg1, x11); +REGISTER_DECLARATION(Register, c_rarg2, x12); +REGISTER_DECLARATION(Register, c_rarg3, x13); +REGISTER_DECLARATION(Register, c_rarg4, x14); +REGISTER_DECLARATION(Register, c_rarg5, x15); +REGISTER_DECLARATION(Register, c_rarg6, x16); +REGISTER_DECLARATION(Register, c_rarg7, x17); + +REGISTER_DECLARATION(FloatRegister, c_farg0, f10); +REGISTER_DECLARATION(FloatRegister, c_farg1, f11); +REGISTER_DECLARATION(FloatRegister, c_farg2, f12); +REGISTER_DECLARATION(FloatRegister, c_farg3, f13); +REGISTER_DECLARATION(FloatRegister, c_farg4, f14); +REGISTER_DECLARATION(FloatRegister, c_farg5, f15); +REGISTER_DECLARATION(FloatRegister, c_farg6, f16); +REGISTER_DECLARATION(FloatRegister, c_farg7, f17); + +// Symbolically name the register arguments used by the Java calling convention. +// We have control over the convention for java so we can do what we please. +// What pleases us is to offset the java calling convention so that when +// we call a suitable jni method the arguments are lined up and we don't +// have to do much shuffling. A suitable jni method is non-static and a +// small number of arguments. +// +// |------------------------------------------------------------------------| +// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7 | +// |------------------------------------------------------------------------| +// | x10 x11 x12 x13 x14 x15 x16 x17 | +// |------------------------------------------------------------------------| +// | j_rarg7 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6 | +// |------------------------------------------------------------------------| + +REGISTER_DECLARATION(Register, j_rarg0, c_rarg1); +REGISTER_DECLARATION(Register, j_rarg1, c_rarg2); +REGISTER_DECLARATION(Register, j_rarg2, c_rarg3); +REGISTER_DECLARATION(Register, j_rarg3, c_rarg4); +REGISTER_DECLARATION(Register, j_rarg4, c_rarg5); +REGISTER_DECLARATION(Register, j_rarg5, c_rarg6); +REGISTER_DECLARATION(Register, j_rarg6, c_rarg7); +REGISTER_DECLARATION(Register, j_rarg7, c_rarg0); + +// Java floating args are passed as per C + +REGISTER_DECLARATION(FloatRegister, j_farg0, f10); +REGISTER_DECLARATION(FloatRegister, j_farg1, f11); +REGISTER_DECLARATION(FloatRegister, j_farg2, f12); +REGISTER_DECLARATION(FloatRegister, j_farg3, f13); +REGISTER_DECLARATION(FloatRegister, j_farg4, f14); +REGISTER_DECLARATION(FloatRegister, j_farg5, f15); +REGISTER_DECLARATION(FloatRegister, j_farg6, f16); +REGISTER_DECLARATION(FloatRegister, j_farg7, f17); + +// zero rigster +REGISTER_DECLARATION(Register, zr, x0); +// global pointer +REGISTER_DECLARATION(Register, gp, x3); +// thread pointer +REGISTER_DECLARATION(Register, tp, x4); + +// registers used to hold VM data either temporarily within a method +// or across method calls + +// volatile (caller-save) registers + +// current method -- must be in a call-clobbered register +REGISTER_DECLARATION(Register, xmethod, x31); +// return address +REGISTER_DECLARATION(Register, ra, x1); + +// non-volatile (callee-save) registers + +// stack pointer +REGISTER_DECLARATION(Register, sp, x2); +// frame pointer +REGISTER_DECLARATION(Register, fp, x8); +// base of heap +REGISTER_DECLARATION(Register, xheapbase, x27); +// constant pool cache +REGISTER_DECLARATION(Register, xcpool, x26); +// monitors allocated on stack +REGISTER_DECLARATION(Register, xmonitors, x25); +// locals on stack +REGISTER_DECLARATION(Register, xlocals, x24); + +// java thread pointer +REGISTER_DECLARATION(Register, xthread, x23); +// bytecode pointer +REGISTER_DECLARATION(Register, xbcp, x22); +// Dispatch table base +REGISTER_DECLARATION(Register, xdispatch, x21); +// Java stack pointer +REGISTER_DECLARATION(Register, esp, x20); + +// temporary register(caller-save registers) +REGISTER_DECLARATION(Register, t0, x5); +REGISTER_DECLARATION(Register, t1, x6); +REGISTER_DECLARATION(Register, t2, x7); + +const Register g_INTArgReg[Argument::n_int_register_parameters_c] = { + c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7 +}; + +const FloatRegister g_FPArgReg[Argument::n_float_register_parameters_c] = { + c_farg0, c_farg1, c_farg2, c_farg3, c_farg4, c_farg5, c_farg6, c_farg7 +}; + +#define assert_cond(ARG1) assert(ARG1, #ARG1) + +// Addressing modes +class Address { + public: + + enum mode { no_mode, base_plus_offset, pcrel, literal }; + + private: + Register _base; + Register _index; + int64_t _offset; + enum mode _mode; + + RelocationHolder _rspec; + + // If the target is far we'll need to load the ea of this to a + // register to reach it. Otherwise if near we can do PC-relative + // addressing. + address _target; + + public: + Address() + : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { } + Address(Register r) + : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { } + + template::value)> + Address(Register r, T o) + : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) {} + + Address(Register r, ByteSize disp) + : Address(r, in_bytes(disp)) {} + Address(address target, RelocationHolder const& rspec) + : _base(noreg), + _index(noreg), + _offset(0), + _mode(literal), + _rspec(rspec), + _target(target) { } + Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type); + + const Register base() const { + guarantee((_mode == base_plus_offset | _mode == pcrel | _mode == literal), "wrong mode"); + return _base; + } + long offset() const { + return _offset; + } + Register index() const { + return _index; + } + mode getMode() const { + return _mode; + } + + bool uses(Register reg) const { return _base == reg; } + const address target() const { return _target; } + const RelocationHolder& rspec() const { return _rspec; } + ~Address() { + _target = NULL; + _base = NULL; + } +}; + +// Convience classes +class RuntimeAddress: public Address { + + public: + + RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {} + ~RuntimeAddress() {} +}; + +class OopAddress: public Address { + + public: + + OopAddress(address target) : Address(target, relocInfo::oop_type) {} + ~OopAddress() {} +}; + +class ExternalAddress: public Address { + private: + static relocInfo::relocType reloc_for_target(address target) { + // Sometimes ExternalAddress is used for values which aren't + // exactly addresses, like the card table base. + // external_word_type can't be used for values in the first page + // so just skip the reloc in that case. + return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; + } + + public: + + ExternalAddress(address target) : Address(target, reloc_for_target(target)) {} + ~ExternalAddress() {} +}; + +class InternalAddress: public Address { + + public: + + InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {} + ~InternalAddress() {} +}; + +class Assembler : public AbstractAssembler { +public: + + enum { instruction_size = 4 }; + + //---< calculate length of instruction >--- + // We just use the values set above. + // instruction must start at passed address + static unsigned int instr_len(unsigned char *instr) { return instruction_size; } + + //---< longest instructions >--- + static unsigned int instr_maxlen() { return instruction_size; } + + enum RoundingMode { + rne = 0b000, // round to Nearest, ties to Even + rtz = 0b001, // round towards Zero + rdn = 0b010, // round Down (towards eegative infinity) + rup = 0b011, // round Up (towards infinity) + rmm = 0b100, // round to Nearest, ties to Max Magnitude + rdy = 0b111, // in instruction's rm field, selects dynamic rounding mode.In Rounding Mode register, Invalid. + }; + + void baseOffset32(Register Rd, const Address &adr, int32_t &offset) { + assert(Rd != noreg, "Rd must not be empty register!"); + guarantee(Rd != adr.base(), "should use different registers!"); + if (is_offset_in_range(adr.offset(), 32)) { + int32_t imm = adr.offset(); + int32_t upper = imm, lower = imm; + lower = (imm << 20) >> 20; + upper -= lower; + lui(Rd, upper); + offset = lower; + } else { + movptr_with_offset(Rd, (address)(uintptr_t)adr.offset(), offset); + } + add(Rd, Rd, adr.base()); + } + + void baseOffset(Register Rd, const Address &adr, int32_t &offset) { + if (is_offset_in_range(adr.offset(), 12)) { + assert(Rd != noreg, "Rd must not be empty register!"); + addi(Rd, adr.base(), adr.offset()); + offset = 0; + } else { + baseOffset32(Rd, adr, offset); + } + } + + void _li(Register Rd, int64_t imm); // optimized load immediate + void li32(Register Rd, int32_t imm); + void li64(Register Rd, int64_t imm); + void movptr(Register Rd, address addr); + void movptr_with_offset(Register Rd, address addr, int32_t &offset); + void movptr(Register Rd, uintptr_t imm64); + void ifence(); + void j(const address &dest, Register temp = t0); + void j(const Address &adr, Register temp = t0); + void j(Label &l, Register temp = t0); + void jal(Label &l, Register temp = t0); + void jal(const address &dest, Register temp = t0); + void jal(const Address &adr, Register temp = t0); + void jr(Register Rs); + void jalr(Register Rs); + void ret(); + void call(const address &dest, Register temp = t0); + void call(const Address &adr, Register temp = t0); + void tail(const address &dest, Register temp = t0); + void tail(const Address &adr, Register temp = t0); + void call(Label &l, Register temp) { + call(target(l), temp); + } + void tail(Label &l, Register temp) { + tail(target(l), temp); + } + + static inline uint32_t extract(uint32_t val, unsigned msb, unsigned lsb) { + assert_cond(msb >= lsb && msb <= 31); + unsigned nbits = msb - lsb + 1; + uint32_t mask = (1U << nbits) - 1; + uint32_t result = val >> lsb; + result &= mask; + return result; + } + + static inline int32_t sextract(uint32_t val, unsigned msb, unsigned lsb) { + assert_cond(msb >= lsb && msb <= 31); + int32_t result = val << (31 - msb); + result >>= (31 - msb + lsb); + return result; + } + + static void patch(address a, unsigned msb, unsigned lsb, unsigned val) { + assert_cond(a != NULL); + assert_cond(msb >= lsb && msb <= 31); + unsigned nbits = msb - lsb + 1; + guarantee(val < (1U << nbits), "Field too big for insn"); + unsigned mask = (1U << nbits) - 1; + val <<= lsb; + mask <<= lsb; + unsigned target = *(unsigned *)a; + target &= ~mask; + target |= val; + *(unsigned *)a = target; + } + + static void patch(address a, unsigned bit, unsigned val) { + patch(a, bit, bit, val); + } + + static void patch_reg(address a, unsigned lsb, Register reg) { + patch(a, lsb + 4, lsb, reg->encoding_nocheck()); + } + + static void patch_reg(address a, unsigned lsb, FloatRegister reg) { + patch(a, lsb + 4, lsb, reg->encoding_nocheck()); + } + + static void patch_reg(address a, unsigned lsb, VectorRegister reg) { + patch(a, lsb + 4, lsb, reg->encoding_nocheck()); + } + + void emit(unsigned insn) { + emit_int32((jint)insn); + } + + void _halt() { + emit_int32(0); + } + +// Register Instruction +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + emit(insn); \ + } + + INSN(_add, 0b0110011, 0b000, 0b0000000); + INSN(_sub, 0b0110011, 0b000, 0b0100000); + INSN(_andr, 0b0110011, 0b111, 0b0000000); + INSN(_orr, 0b0110011, 0b110, 0b0000000); + INSN(_xorr, 0b0110011, 0b100, 0b0000000); + INSN(sll, 0b0110011, 0b001, 0b0000000); + INSN(sra, 0b0110011, 0b101, 0b0100000); + INSN(srl, 0b0110011, 0b101, 0b0000000); + INSN(slt, 0b0110011, 0b010, 0b0000000); + INSN(sltu, 0b0110011, 0b011, 0b0000000); + INSN(_addw, 0b0111011, 0b000, 0b0000000); + INSN(_subw, 0b0111011, 0b000, 0b0100000); + INSN(sllw, 0b0111011, 0b001, 0b0000000); + INSN(sraw, 0b0111011, 0b101, 0b0100000); + INSN(srlw, 0b0111011, 0b101, 0b0000000); + INSN(mul, 0b0110011, 0b000, 0b0000001); + INSN(mulh, 0b0110011, 0b001, 0b0000001); + INSN(mulhsu,0b0110011, 0b010, 0b0000001); + INSN(mulhu, 0b0110011, 0b011, 0b0000001); + INSN(mulw, 0b0111011, 0b000, 0b0000001); + INSN(div, 0b0110011, 0b100, 0b0000001); + INSN(divu, 0b0110011, 0b101, 0b0000001); + INSN(divw, 0b0111011, 0b100, 0b0000001); + INSN(divuw, 0b0111011, 0b101, 0b0000001); + INSN(rem, 0b0110011, 0b110, 0b0000001); + INSN(remu, 0b0110011, 0b111, 0b0000001); + INSN(remw, 0b0111011, 0b110, 0b0000001); + INSN(remuw, 0b0111011, 0b111, 0b0000001); + +#undef INSN + +#define INSN_ENTRY_RELOC(result_type, header) \ + result_type header { \ + InstructionMark im(this); \ + guarantee(rtype == relocInfo::internal_word_type, \ + "only internal_word_type relocs make sense here"); \ + code_section()->relocate(inst_mark(), InternalAddress(dest).rspec()); + + // Load/store register (all modes) +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + unsigned insn = 0; \ + int32_t val = offset & 0xfff; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 15, Rs); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 31, 20, val); \ + emit(insn); \ + } + + INSN(lb, 0b0000011, 0b000); + INSN(lbu, 0b0000011, 0b100); + INSN(lh, 0b0000011, 0b001); + INSN(lhu, 0b0000011, 0b101); + INSN(_lw, 0b0000011, 0b010); + INSN(lwu, 0b0000011, 0b110); + INSN(_ld, 0b0000011, 0b011); + +#undef INSN + +#define INSN(NAME) \ + void NAME(Register Rd, address dest) { \ + assert_cond(dest != NULL); \ + int64_t distance = (dest - pc()); \ + if (is_offset_in_range(distance, 32)) { \ + auipc(Rd, (int32_t)distance + 0x800); \ + NAME(Rd, Rd, ((int32_t)distance << 20) >> 20); \ + } else { \ + int32_t offset = 0; \ + movptr_with_offset(Rd, dest, offset); \ + NAME(Rd, Rd, offset); \ + } \ + } \ + INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype)) \ + NAME(Rd, dest); \ + } \ + void NAME(Register Rd, const Address &adr, Register temp = t0) { \ + switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rd, adr.target()); \ + break; \ + } \ + case Address::base_plus_offset: { \ + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rd, adr.base(), adr.offset()); \ + } else { \ + int32_t offset = 0; \ + if (Rd == adr.base()) { \ + baseOffset32(temp, adr, offset); \ + NAME(Rd, temp, offset); \ + } else { \ + baseOffset32(Rd, adr, offset); \ + NAME(Rd, Rd, offset); \ + } \ + } \ + break; \ + } \ + default: \ + ShouldNotReachHere(); \ + } \ + } \ + void NAME(Register Rd, Label &L) { \ + wrap_label(Rd, L, &Assembler::NAME); \ + } + + INSN(lb); + INSN(lbu); + INSN(lh); + INSN(lhu); + INSN(lw); + INSN(lwu); + INSN(ld); + +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + unsigned insn = 0; \ + uint32_t val = offset & 0xfff; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 15, Rs); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 31, 20, val); \ + emit(insn); \ + } + + INSN(flw, 0b0000111, 0b010); + INSN(_fld, 0b0000111, 0b011); + +#undef INSN + +#define INSN(NAME) \ + void NAME(FloatRegister Rd, address dest, Register temp = t0) { \ + assert_cond(dest != NULL); \ + int64_t distance = (dest - pc()); \ + if (is_offset_in_range(distance, 32)) { \ + auipc(temp, (int32_t)distance + 0x800); \ + NAME(Rd, temp, ((int32_t)distance << 20) >> 20); \ + } else { \ + int32_t offset = 0; \ + movptr_with_offset(temp, dest, offset); \ + NAME(Rd, temp, offset); \ + } \ + } \ + INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest, relocInfo::relocType rtype, Register temp = t0)) \ + NAME(Rd, dest, temp); \ + } \ + void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) { \ + switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rd, adr.target(), temp); \ + break; \ + } \ + case Address::base_plus_offset: { \ + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rd, adr.base(), adr.offset()); \ + } else { \ + int32_t offset = 0; \ + baseOffset32(temp, adr, offset); \ + NAME(Rd, temp, offset); \ + } \ + break; \ + } \ + default: \ + ShouldNotReachHere(); \ + } \ + } + + INSN(flw); + INSN(fld); +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(Register Rs1, Register Rs2, const int64_t offset) { \ + guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid."); \ + unsigned insn = 0; \ + uint32_t val = offset & 0x1fff; \ + uint32_t val11 = (val >> 11) & 0x1; \ + uint32_t val12 = (val >> 12) & 0x1; \ + uint32_t low = (val >> 1) & 0xf; \ + uint32_t high = (val >> 5) & 0x3f; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + patch((address)&insn, 7, val11); \ + patch((address)&insn, 11, 8, low); \ + patch((address)&insn, 30, 25, high); \ + patch((address)&insn, 31, val12); \ + emit(insn); \ + } + + INSN(_beq, 0b1100011, 0b000); + INSN(_bne, 0b1100011, 0b001); + INSN(bge, 0b1100011, 0b101); + INSN(bgeu, 0b1100011, 0b111); + INSN(blt, 0b1100011, 0b100); + INSN(bltu, 0b1100011, 0b110); + +#undef INSN + +#define INSN(NAME) \ + void NAME(Register Rs1, Register Rs2, const address dest) { \ + assert_cond(dest != NULL); \ + int64_t offset = (dest - pc()); \ + guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid."); \ + NAME(Rs1, Rs2, offset); \ + } \ + INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype)) \ + NAME(Rs1, Rs2, dest); \ + } + + INSN(beq); + INSN(bne); + INSN(bge); + INSN(bgeu); + INSN(blt); + INSN(bltu); + +#undef INSN + +#define INSN(NAME, NEG_INSN) \ + void NAME(Register Rs1, Register Rs2, Label &L, bool is_far = false) { \ + wrap_label(Rs1, Rs2, L, &Assembler::NAME, &Assembler::NEG_INSN, is_far); \ + } + + INSN(beq, bne); + INSN(bne, beq); + INSN(blt, bge); + INSN(bge, blt); + INSN(bltu, bgeu); + INSN(bgeu, bltu); + +#undef INSN + +#define INSN(NAME, REGISTER, op, funct3) \ + void NAME(REGISTER Rs1, Register Rs2, const int32_t offset) { \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + unsigned insn = 0; \ + uint32_t val = offset & 0xfff; \ + uint32_t low = val & 0x1f; \ + uint32_t high = (val >> 5) & 0x7f; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 15, Rs2); \ + patch_reg((address)&insn, 20, Rs1); \ + patch((address)&insn, 11, 7, low); \ + patch((address)&insn, 31, 25, high); \ + emit(insn); \ + } \ + + INSN(sb, Register, 0b0100011, 0b000); + INSN(sh, Register, 0b0100011, 0b001); + INSN(_sw, Register, 0b0100011, 0b010); + INSN(_sd, Register, 0b0100011, 0b011); + INSN(fsw, FloatRegister, 0b0100111, 0b010); + INSN(_fsd, FloatRegister, 0b0100111, 0b011); + +#undef INSN + +#define INSN(NAME, REGISTER) \ + INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, relocInfo::relocType rtype, Register temp = t0)) \ + NAME(Rs, dest, temp); \ + } + + INSN(sb, Register); + INSN(sh, Register); + INSN(sw, Register); + INSN(sd, Register); + INSN(fsw, FloatRegister); + INSN(fsd, FloatRegister); + +#undef INSN + +#define INSN(NAME) \ + void NAME(Register Rs, address dest, Register temp = t0) { \ + assert_cond(dest != NULL); \ + assert_different_registers(Rs, temp); \ + int64_t distance = (dest - pc()); \ + if (is_offset_in_range(distance, 32)) { \ + auipc(temp, (int32_t)distance + 0x800); \ + NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \ + } else { \ + int32_t offset = 0; \ + movptr_with_offset(temp, dest, offset); \ + NAME(Rs, temp, offset); \ + } \ + } \ + void NAME(Register Rs, const Address &adr, Register temp = t0) { \ + switch (adr.getMode()) { \ + case Address::literal: { \ + assert_different_registers(Rs, temp); \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rs, adr.target(), temp); \ + break; \ + } \ + case Address::base_plus_offset: { \ + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rs, adr.base(), adr.offset()); \ + } else { \ + int32_t offset= 0; \ + assert_different_registers(Rs, temp); \ + baseOffset32(temp, adr, offset); \ + NAME(Rs, temp, offset); \ + } \ + break; \ + } \ + default: \ + ShouldNotReachHere(); \ + } \ + } + + INSN(sb); + INSN(sh); + INSN(sw); + INSN(sd); + +#undef INSN + +#define INSN(NAME) \ + void NAME(FloatRegister Rs, address dest, Register temp = t0) { \ + assert_cond(dest != NULL); \ + int64_t distance = (dest - pc()); \ + if (is_offset_in_range(distance, 32)) { \ + auipc(temp, (int32_t)distance + 0x800); \ + NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \ + } else { \ + int32_t offset = 0; \ + movptr_with_offset(temp, dest, offset); \ + NAME(Rs, temp, offset); \ + } \ + } \ + void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) { \ + switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rs, adr.target(), temp); \ + break; \ + } \ + case Address::base_plus_offset: { \ + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rs, adr.base(), adr.offset()); \ + } else { \ + int32_t offset = 0; \ + baseOffset32(temp, adr, offset); \ + NAME(Rs, temp, offset); \ + } \ + break; \ + } \ + default: \ + ShouldNotReachHere(); \ + } \ + } + + INSN(fsw); + INSN(fsd); + +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, const uint32_t csr, Register Rs1) { \ + guarantee(is_unsigned_imm_in_range(csr, 12, 0), "csr is invalid"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch((address)&insn, 31, 20, csr); \ + emit(insn); \ + } + + INSN(csrrw, 0b1110011, 0b001); + INSN(csrrs, 0b1110011, 0b010); + INSN(csrrc, 0b1110011, 0b011); + +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, const uint32_t csr, const uint32_t uimm) { \ + guarantee(is_unsigned_imm_in_range(csr, 12, 0), "csr is invalid"); \ + guarantee(is_unsigned_imm_in_range(uimm, 5, 0), "uimm is invalid"); \ + unsigned insn = 0; \ + uint32_t val = uimm & 0x1f; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 19, 15, val); \ + patch((address)&insn, 31, 20, csr); \ + emit(insn); \ + } + + INSN(csrrwi, 0b1110011, 0b101); + INSN(csrrsi, 0b1110011, 0b110); + INSN(csrrci, 0b1110011, 0b111); + +#undef INSN + +#define INSN(NAME, op) \ + void NAME(Register Rd, const int32_t offset) { \ + guarantee(is_imm_in_range(offset, 20, 1), "offset is invalid."); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff)); \ + patch((address)&insn, 20, (uint32_t)((offset >> 11) & 0x1)); \ + patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff)); \ + patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1)); \ + emit(insn); \ + } + + INSN(_jal, 0b1101111); + +#undef INSN + +#define INSN(NAME) \ + void NAME(Register Rd, const address dest, Register temp = t0) { \ + assert_cond(dest != NULL); \ + int64_t offset = dest - pc(); \ + if (is_imm_in_range(offset, 20, 1)) { \ + NAME(Rd, offset); \ + } else { \ + assert_different_registers(Rd, temp); \ + int32_t off = 0; \ + movptr_with_offset(temp, dest, off); \ + jalr(Rd, temp, off); \ + } \ + } \ + void NAME(Register Rd, Label &L, Register temp = t0) { \ + assert_different_registers(Rd, temp); \ + wrap_label(Rd, L, temp, &Assembler::NAME); \ + } + + INSN(jal); + +#undef INSN + +#undef INSN_ENTRY_RELOC + +#define INSN(NAME, op, funct) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 14, 12, funct); \ + patch_reg((address)&insn, 15, Rs); \ + int32_t val = offset & 0xfff; \ + patch((address)&insn, 31, 20, val); \ + emit(insn); \ + } + + INSN(_jalr, 0b1100111, 0b000); + +#undef INSN + + enum barrier { + i = 0b1000, o = 0b0100, r = 0b0010, w = 0b0001, + ir = i | r, ow = o | w, iorw = i | o | r | w + }; + + void fence(const uint32_t predecessor, const uint32_t successor) { + unsigned insn = 0; + guarantee(predecessor < 16, "predecessor is invalid"); + guarantee(successor < 16, "successor is invalid"); + patch((address)&insn, 6, 0, 0b001111); + patch((address)&insn, 11, 7, 0b00000); + patch((address)&insn, 14, 12, 0b000); + patch((address)&insn, 19, 15, 0b00000); + patch((address)&insn, 23, 20, successor); + patch((address)&insn, 27, 24, predecessor); + patch((address)&insn, 31, 28, 0b0000); + emit(insn); + } + +#define INSN(NAME, op, funct3, funct7) \ + void NAME() { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 11, 7, 0b00000); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 19, 15, 0b00000); \ + patch((address)&insn, 31, 20, funct7); \ + emit(insn); \ + } + + INSN(fence_i, 0b0001111, 0b001, 0b000000000000); + INSN(ecall, 0b1110011, 0b000, 0b000000000000); + INSN(_ebreak, 0b1110011, 0b000, 0b000000000001); + +#undef INSN + +enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11}; + +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + patch((address)&insn, 31, 27, funct7); \ + patch((address)&insn, 26, 25, memory_order); \ + emit(insn); \ + } + + INSN(amoswap_w, 0b0101111, 0b010, 0b00001); + INSN(amoadd_w, 0b0101111, 0b010, 0b00000); + INSN(amoxor_w, 0b0101111, 0b010, 0b00100); + INSN(amoand_w, 0b0101111, 0b010, 0b01100); + INSN(amoor_w, 0b0101111, 0b010, 0b01000); + INSN(amomin_w, 0b0101111, 0b010, 0b10000); + INSN(amomax_w, 0b0101111, 0b010, 0b10100); + INSN(amominu_w, 0b0101111, 0b010, 0b11000); + INSN(amomaxu_w, 0b0101111, 0b010, 0b11100); + INSN(amoswap_d, 0b0101111, 0b011, 0b00001); + INSN(amoadd_d, 0b0101111, 0b011, 0b00000); + INSN(amoxor_d, 0b0101111, 0b011, 0b00100); + INSN(amoand_d, 0b0101111, 0b011, 0b01100); + INSN(amoor_d, 0b0101111, 0b011, 0b01000); + INSN(amomin_d, 0b0101111, 0b011, 0b10000); + INSN(amomax_d , 0b0101111, 0b011, 0b10100); + INSN(amominu_d, 0b0101111, 0b011, 0b11000); + INSN(amomaxu_d, 0b0101111, 0b011, 0b11100); +#undef INSN + +enum operand_size { int8, int16, int32, uint32, int64 }; + +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Aqrl memory_order = relaxed) { \ + unsigned insn = 0; \ + uint32_t val = memory_order & 0x3; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch((address)&insn, 25, 20, 0b00000); \ + patch((address)&insn, 31, 27, funct7); \ + patch((address)&insn, 26, 25, val); \ + emit(insn); \ + } + + INSN(lr_w, 0b0101111, 0b010, 0b00010); + INSN(lr_d, 0b0101111, 0b011, 0b00010); + +#undef INSN + +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = relaxed) { \ + unsigned insn = 0; \ + uint32_t val = memory_order & 0x3; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs2); \ + patch_reg((address)&insn, 20, Rs1); \ + patch((address)&insn, 31, 27, funct7); \ + patch((address)&insn, 26, 25, val); \ + emit(insn); \ + } + + INSN(sc_w, 0b0101111, 0b010, 0b00011); + INSN(sc_d, 0b0101111, 0b011, 0b00011); +#undef INSN + +#define INSN(NAME, op, funct5, funct7) \ + void NAME(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, rm); \ + patch((address)&insn, 24, 20, funct5); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(fsqrt_s, 0b1010011, 0b00000, 0b0101100); + INSN(fsqrt_d, 0b1010011, 0b00000, 0b0101101); + INSN(fcvt_s_d, 0b1010011, 0b00001, 0b0100000); + INSN(fcvt_d_s, 0b1010011, 0b00000, 0b0100001); +#undef INSN + +// Immediate Instruction +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, Register Rs1, int32_t imm) { \ + guarantee(is_imm_in_range(imm, 12, 0), "Immediate is out of validity"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 20, imm & 0x00000fff); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(_addi, 0b0010011, 0b000); + INSN(slti, 0b0010011, 0b010); + INSN(_addiw, 0b0011011, 0b000); + INSN(_and_imm12, 0b0010011, 0b111); + INSN(ori, 0b0010011, 0b110); + INSN(xori, 0b0010011, 0b100); + +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, Register Rs1, uint32_t imm) { \ + guarantee(is_unsigned_imm_in_range(imm, 12, 0), "Immediate is out of validity"); \ + unsigned insn = 0; \ + patch((address)&insn,6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 20, imm & 0x00000fff); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(sltiu, 0b0010011, 0b011); + +#undef INSN + +// Shift Immediate Instruction +#define INSN(NAME, op, funct3, funct6) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) { \ + guarantee(shamt <= 0x3f, "Shamt is invalid"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 25, 20, shamt); \ + patch((address)&insn, 31, 26, funct6); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(_slli, 0b0010011, 0b001, 0b000000); + INSN(_srai, 0b0010011, 0b101, 0b010000); + INSN(_srli, 0b0010011, 0b101, 0b000000); + +#undef INSN + +// Shift Word Immediate Instruction +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) { \ + guarantee(shamt <= 0x1f, "Shamt is invalid"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 24, 20, shamt); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(slliw, 0b0011011, 0b001, 0b0000000); + INSN(sraiw, 0b0011011, 0b101, 0b0100000); + INSN(srliw, 0b0011011, 0b101, 0b0000000); + +#undef INSN + +// Upper Immediate Instruction +#define INSN(NAME, op) \ + void NAME(Register Rd, int32_t imm) { \ + int32_t upperImm = imm >> 12; \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch_reg((address)&insn, 7, Rd); \ + upperImm &= 0x000fffff; \ + patch((address)&insn, 31, 12, upperImm); \ + emit(insn); \ + } + + INSN(_lui, 0b0110111); + INSN(auipc, 0b0010111); + +#undef INSN + +// Float and Double Rigster Instruction +#define INSN(NAME, op, funct2) \ + void NAME(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, rm); \ + patch((address)&insn, 26, 25, funct2); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + patch_reg((address)&insn, 27, Rs3); \ + emit(insn); \ + } + + INSN(fmadd_s, 0b1000011, 0b00); + INSN(fmsub_s, 0b1000111, 0b00); + INSN(fnmsub_s, 0b1001011, 0b00); + INSN(fnmadd_s, 0b1001111, 0b00); + INSN(fmadd_d, 0b1000011, 0b01); + INSN(fmsub_d, 0b1000111, 0b01); + INSN(fnmsub_d, 0b1001011, 0b01); + INSN(fnmadd_d, 0b1001111, 0b01); + +#undef INSN + +// Float and Double Rigster Instruction +#define INSN(NAME, op, funct3, funct7) \ + void NAME(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + emit(insn); \ + } + + INSN(fsgnj_s, 0b1010011, 0b000, 0b0010000); + INSN(fsgnjn_s, 0b1010011, 0b001, 0b0010000); + INSN(fsgnjx_s, 0b1010011, 0b010, 0b0010000); + INSN(fmin_s, 0b1010011, 0b000, 0b0010100); + INSN(fmax_s, 0b1010011, 0b001, 0b0010100); + INSN(fsgnj_d, 0b1010011, 0b000, 0b0010001); + INSN(fsgnjn_d, 0b1010011, 0b001, 0b0010001); + INSN(fsgnjx_d, 0b1010011, 0b010, 0b0010001); + INSN(fmin_d, 0b1010011, 0b000, 0b0010101); + INSN(fmax_d, 0b1010011, 0b001, 0b0010101); + +#undef INSN + +// Float and Double Rigster Arith Instruction +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, FloatRegister Rs1, FloatRegister Rs2) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + emit(insn); \ + } + + INSN(feq_s, 0b1010011, 0b010, 0b1010000); + INSN(flt_s, 0b1010011, 0b001, 0b1010000); + INSN(fle_s, 0b1010011, 0b000, 0b1010000); + INSN(feq_d, 0b1010011, 0b010, 0b1010001); + INSN(fle_d, 0b1010011, 0b000, 0b1010001); + INSN(flt_d, 0b1010011, 0b001, 0b1010001); +#undef INSN + +// Float and Double Arith Instruction +#define INSN(NAME, op, funct7) \ + void NAME(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, rm); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + emit(insn); \ + } + + INSN(fadd_s, 0b1010011, 0b0000000); + INSN(fsub_s, 0b1010011, 0b0000100); + INSN(fmul_s, 0b1010011, 0b0001000); + INSN(fdiv_s, 0b1010011, 0b0001100); + INSN(fadd_d, 0b1010011, 0b0000001); + INSN(fsub_d, 0b1010011, 0b0000101); + INSN(fmul_d, 0b1010011, 0b0001001); + INSN(fdiv_d, 0b1010011, 0b0001101); + +#undef INSN + +// Whole Float and Double Conversion Instruction +#define INSN(NAME, op, funct5, funct7) \ + void NAME(FloatRegister Rd, Register Rs1, RoundingMode rm = rne) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, rm); \ + patch((address)&insn, 24, 20, funct5); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(fcvt_s_w, 0b1010011, 0b00000, 0b1101000); + INSN(fcvt_s_wu, 0b1010011, 0b00001, 0b1101000); + INSN(fcvt_s_l, 0b1010011, 0b00010, 0b1101000); + INSN(fcvt_s_lu, 0b1010011, 0b00011, 0b1101000); + INSN(fcvt_d_w, 0b1010011, 0b00000, 0b1101001); + INSN(fcvt_d_wu, 0b1010011, 0b00001, 0b1101001); + INSN(fcvt_d_l, 0b1010011, 0b00010, 0b1101001); + INSN(fcvt_d_lu, 0b1010011, 0b00011, 0b1101001); + +#undef INSN + +// Float and Double Conversion Instruction +#define INSN(NAME, op, funct5, funct7) \ + void NAME(Register Rd, FloatRegister Rs1, RoundingMode rm = rtz) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, rm); \ + patch((address)&insn, 24, 20, funct5); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(fcvt_w_s, 0b1010011, 0b00000, 0b1100000); + INSN(fcvt_l_s, 0b1010011, 0b00010, 0b1100000); + INSN(fcvt_wu_s, 0b1010011, 0b00001, 0b1100000); + INSN(fcvt_lu_s, 0b1010011, 0b00011, 0b1100000); + INSN(fcvt_w_d, 0b1010011, 0b00000, 0b1100001); + INSN(fcvt_wu_d, 0b1010011, 0b00001, 0b1100001); + INSN(fcvt_l_d, 0b1010011, 0b00010, 0b1100001); + INSN(fcvt_lu_d, 0b1010011, 0b00011, 0b1100001); + +#undef INSN + +// Float and Double Move Instruction +#define INSN(NAME, op, funct3, funct5, funct7) \ + void NAME(FloatRegister Rd, Register Rs1) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 20, funct5); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(fmv_w_x, 0b1010011, 0b000, 0b00000, 0b1111000); + INSN(fmv_d_x, 0b1010011, 0b000, 0b00000, 0b1111001); + +#undef INSN + +// Float and Double Conversion Instruction +#define INSN(NAME, op, funct3, funct5, funct7) \ + void NAME(Register Rd, FloatRegister Rs1) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 20, funct5); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(fclass_s, 0b1010011, 0b001, 0b00000, 0b1110000); + INSN(fclass_d, 0b1010011, 0b001, 0b00000, 0b1110001); + INSN(fmv_x_w, 0b1010011, 0b000, 0b00000, 0b1110000); + INSN(fmv_x_d, 0b1010011, 0b000, 0b00000, 0b1110001); + +#undef INSN + +// ========================== +// RISC-V Vector Extension +// ========================== +enum SEW { + e8, + e16, + e32, + e64, + RESERVED, +}; + +enum LMUL { + mf8 = 0b101, + mf4 = 0b110, + mf2 = 0b111, + m1 = 0b000, + m2 = 0b001, + m4 = 0b010, + m8 = 0b011, +}; + +enum VMA { + mu, // undisturbed + ma, // agnostic +}; + +enum VTA { + tu, // undisturbed + ta, // agnostic +}; + +static Assembler::SEW elembytes_to_sew(int ebytes) { + assert(ebytes > 0 && ebytes <= 8, "unsupported element size"); + return (Assembler::SEW) exact_log2(ebytes); +} + +static Assembler::SEW elemtype_to_sew(BasicType etype) { + return Assembler::elembytes_to_sew(type2aelembytes(etype)); +} + +#define patch_vtype(hsb, lsb, vlmul, vsew, vta, vma, vill) \ + if (vill == 1) { \ + guarantee((vlmul | vsew | vta | vma == 0), \ + "the other bits in vtype shall be zero"); \ + } \ + patch((address)&insn, lsb + 2, lsb, vlmul); \ + patch((address)&insn, lsb + 5, lsb + 3, vsew); \ + patch((address)&insn, lsb + 6, vta); \ + patch((address)&insn, lsb + 7, vma); \ + patch((address)&insn, hsb - 1, lsb + 8, 0); \ + patch((address)&insn, hsb, vill) + +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, Register Rs1, SEW sew, LMUL lmul = m1, \ + VMA vma = mu, VTA vta = tu, bool vill = false) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_vtype(30, 20, lmul, sew, vta, vma, vill); \ + patch((address)&insn, 31, 0); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(vsetvli, 0b1010111, 0b111); + +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, uint32_t imm, SEW sew, LMUL lmul = m1, \ + VMA vma = mu, VTA vta = tu, bool vill = false) { \ + unsigned insn = 0; \ + guarantee(is_unsigned_imm_in_range(imm, 5, 0), "imm is invalid"); \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 19, 15, imm); \ + patch_vtype(29, 20, lmul, sew, vta, vma, vill); \ + patch((address)&insn, 31, 30, 0b11); \ + patch_reg((address)&insn, 7, Rd); \ + emit(insn); \ + } + + INSN(vsetivli, 0b1010111, 0b111); + +#undef INSN + +#undef patch_vtype + +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + emit(insn); \ + } + + // Vector Configuration Instruction + INSN(vsetvl, 0b1010111, 0b111, 0b1000000); + +#undef INSN + +enum VectorMask { + v0_t = 0b0, + unmasked = 0b1 +}; + +#define patch_VArith(op, Reg, funct3, Reg_or_Imm5, Vs2, vm, funct6) \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 19, 15, Reg_or_Imm5); \ + patch((address)&insn, 25, vm); \ + patch((address)&insn, 31, 26, funct6); \ + patch_reg((address)&insn, 7, Reg); \ + patch_reg((address)&insn, 20, Vs2); \ + emit(insn) + +// r2_vm +#define INSN(NAME, op, funct3, Vs1, funct6) \ + void NAME(Register Rd, VectorRegister Vs2, VectorMask vm = unmasked) { \ + patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6); \ + } + + // Vector Mask + INSN(vpopc_m, 0b1010111, 0b010, 0b10000, 0b010000); + INSN(vfirst_m, 0b1010111, 0b010, 0b10001, 0b010000); +#undef INSN + +#define INSN(NAME, op, funct3, Vs1, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, VectorMask vm = unmasked) { \ + patch_VArith(op, Vd, funct3, Vs1, Vs2, vm, funct6); \ + } + + // Vector Integer Extension + INSN(vzext_vf2, 0b1010111, 0b010, 0b00110, 0b010010); + INSN(vzext_vf4, 0b1010111, 0b010, 0b00100, 0b010010); + INSN(vzext_vf8, 0b1010111, 0b010, 0b00010, 0b010010); + INSN(vsext_vf2, 0b1010111, 0b010, 0b00111, 0b010010); + INSN(vsext_vf4, 0b1010111, 0b010, 0b00101, 0b010010); + INSN(vsext_vf8, 0b1010111, 0b010, 0b00011, 0b010010); + + // Vector Mask + INSN(vmsbf_m, 0b1010111, 0b010, 0b00001, 0b010100); + INSN(vmsif_m, 0b1010111, 0b010, 0b00011, 0b010100); + INSN(vmsof_m, 0b1010111, 0b010, 0b00010, 0b010100); + INSN(viota_m, 0b1010111, 0b010, 0b10000, 0b010100); + + // Vector Single-Width Floating-Point/Integer Type-Convert Instructions + INSN(vfcvt_xu_f_v, 0b1010111, 0b001, 0b00000, 0b010010); + INSN(vfcvt_x_f_v, 0b1010111, 0b001, 0b00001, 0b010010); + INSN(vfcvt_f_xu_v, 0b1010111, 0b001, 0b00010, 0b010010); + INSN(vfcvt_f_x_v, 0b1010111, 0b001, 0b00011, 0b010010); + INSN(vfcvt_rtz_xu_f_v, 0b1010111, 0b001, 0b00110, 0b010010); + INSN(vfcvt_rtz_x_f_v, 0b1010111, 0b001, 0b00111, 0b010010); + + // Vector Floating-Point Instruction + INSN(vfsqrt_v, 0b1010111, 0b001, 0b00000, 0b010011); + INSN(vfclass_v, 0b1010111, 0b001, 0b10000, 0b010011); + +#undef INSN + +// r2rd +#define INSN(NAME, op, funct3, simm5, vm, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2) { \ + patch_VArith(op, Vd, funct3, simm5, Vs2, vm, funct6); \ + } + + // Vector Whole Vector Register Move + INSN(vmv1r_v, 0b1010111, 0b011, 0b00000, 0b1, 0b100111); + INSN(vmv2r_v, 0b1010111, 0b011, 0b00001, 0b1, 0b100111); + INSN(vmv4r_v, 0b1010111, 0b011, 0b00011, 0b1, 0b100111); + INSN(vmv8r_v, 0b1010111, 0b011, 0b00111, 0b1, 0b100111); + +#undef INSN + +#define INSN(NAME, op, funct3, Vs1, vm, funct6) \ + void NAME(FloatRegister Rd, VectorRegister Vs2) { \ + patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6); \ + } + + // Vector Floating-Point Move Instruction + INSN(vfmv_f_s, 0b1010111, 0b001, 0b00000, 0b1, 0b010000); + +#undef INSN + +#define INSN(NAME, op, funct3, Vs1, vm, funct6) \ + void NAME(Register Rd, VectorRegister Vs2) { \ + patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6); \ + } + + // Vector Integer Scalar Move Instructions + INSN(vmv_x_s, 0b1010111, 0b010, 0b00000, 0b1, 0b010000); + +#undef INSN + +// r_vm +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) { \ + guarantee(is_unsigned_imm_in_range(imm, 5, 0), "imm is invalid"); \ + patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \ + } + + // Vector Single-Width Bit Shift Instructions + INSN(vsra_vi, 0b1010111, 0b011, 0b101001); + INSN(vsrl_vi, 0b1010111, 0b011, 0b101000); + INSN(vsll_vi, 0b1010111, 0b011, 0b100101); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs1, VectorRegister Vs2, VectorMask vm = unmasked) { \ + patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Single-Width Floating-Point Fused Multiply-Add Instructions + INSN(vfnmsub_vv, 0b1010111, 0b001, 0b101011); + INSN(vfmsub_vv, 0b1010111, 0b001, 0b101010); + INSN(vfnmadd_vv, 0b1010111, 0b001, 0b101001); + INSN(vfmadd_vv, 0b1010111, 0b001, 0b101000); + INSN(vfnmsac_vv, 0b1010111, 0b001, 0b101111); + INSN(vfmsac_vv, 0b1010111, 0b001, 0b101110); + INSN(vfmacc_vv, 0b1010111, 0b001, 0b101100); + INSN(vfnmacc_vv, 0b1010111, 0b001, 0b101101); + + // Vector Single-Width Integer Multiply-Add Instructions + INSN(vnmsub_vv, 0b1010111, 0b010, 0b101011); + INSN(vmadd_vv, 0b1010111, 0b010, 0b101001); + INSN(vnmsac_vv, 0b1010111, 0b010, 0b101111); + INSN(vmacc_vv, 0b1010111, 0b010, 0b101101); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, Register Rs1, VectorRegister Vs2, VectorMask vm = unmasked) { \ + patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Single-Width Integer Multiply-Add Instructions + INSN(vnmsub_vx, 0b1010111, 0b110, 0b101011); + INSN(vmadd_vx, 0b1010111, 0b110, 0b101001); + INSN(vnmsac_vx, 0b1010111, 0b110, 0b101111); + INSN(vmacc_vx, 0b1010111, 0b110, 0b101101); + + INSN(vrsub_vx, 0b1010111, 0b100, 0b000011); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, FloatRegister Rs1, VectorRegister Vs2, VectorMask vm = unmasked) { \ + patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Single-Width Floating-Point Fused Multiply-Add Instructions + INSN(vfnmsub_vf, 0b1010111, 0b101, 0b101011); + INSN(vfmsub_vf, 0b1010111, 0b101, 0b101010); + INSN(vfnmadd_vf, 0b1010111, 0b101, 0b101001); + INSN(vfmadd_vf, 0b1010111, 0b101, 0b101000); + INSN(vfnmsac_vf, 0b1010111, 0b101, 0b101111); + INSN(vfmsac_vf, 0b1010111, 0b101, 0b101110); + INSN(vfmacc_vf, 0b1010111, 0b101, 0b101100); + INSN(vfnmacc_vf, 0b1010111, 0b101, 0b101101); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1, VectorMask vm = unmasked) { \ + patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Single-Width Floating-Point Reduction Instructions + INSN(vfredsum_vs, 0b1010111, 0b001, 0b000001); + INSN(vfredosum_vs, 0b1010111, 0b001, 0b000011); + INSN(vfredmin_vs, 0b1010111, 0b001, 0b000101); + INSN(vfredmax_vs, 0b1010111, 0b001, 0b000111); + + // Vector Single-Width Integer Reduction Instructions + INSN(vredsum_vs, 0b1010111, 0b010, 0b000000); + INSN(vredand_vs, 0b1010111, 0b010, 0b000001); + INSN(vredor_vs, 0b1010111, 0b010, 0b000010); + INSN(vredxor_vs, 0b1010111, 0b010, 0b000011); + INSN(vredminu_vs, 0b1010111, 0b010, 0b000100); + INSN(vredmin_vs, 0b1010111, 0b010, 0b000101); + INSN(vredmaxu_vs, 0b1010111, 0b010, 0b000110); + INSN(vredmax_vs, 0b1010111, 0b010, 0b000111); + + // Vector Floating-Point Compare Instructions + INSN(vmfle_vv, 0b1010111, 0b001, 0b011001); + INSN(vmflt_vv, 0b1010111, 0b001, 0b011011); + INSN(vmfne_vv, 0b1010111, 0b001, 0b011100); + INSN(vmfeq_vv, 0b1010111, 0b001, 0b011000); + + // Vector Floating-Point Sign-Injection Instructions + INSN(vfsgnjx_vv, 0b1010111, 0b001, 0b001010); + INSN(vfsgnjn_vv, 0b1010111, 0b001, 0b001001); + INSN(vfsgnj_vv, 0b1010111, 0b001, 0b001000); + + // Vector Floating-Point MIN/MAX Instructions + INSN(vfmax_vv, 0b1010111, 0b001, 0b000110); + INSN(vfmin_vv, 0b1010111, 0b001, 0b000100); + + // Vector Single-Width Floating-Point Multiply/Divide Instructions + INSN(vfdiv_vv, 0b1010111, 0b001, 0b100000); + INSN(vfmul_vv, 0b1010111, 0b001, 0b100100); + + // Vector Single-Width Floating-Point Add/Subtract Instructions + INSN(vfsub_vv, 0b1010111, 0b001, 0b000010); + INSN(vfadd_vv, 0b1010111, 0b001, 0b000000); + + // Vector Single-Width Fractional Multiply with Rounding and Saturation + INSN(vsmul_vv, 0b1010111, 0b000, 0b100111); + + // Vector Integer Divide Instructions + INSN(vrem_vv, 0b1010111, 0b010, 0b100011); + INSN(vremu_vv, 0b1010111, 0b010, 0b100010); + INSN(vdiv_vv, 0b1010111, 0b010, 0b100001); + INSN(vdivu_vv, 0b1010111, 0b010, 0b100000); + + // Vector Single-Width Integer Multiply Instructions + INSN(vmulhsu_vv, 0b1010111, 0b010, 0b100110); + INSN(vmulhu_vv, 0b1010111, 0b010, 0b100100); + INSN(vmulh_vv, 0b1010111, 0b010, 0b100111); + INSN(vmul_vv, 0b1010111, 0b010, 0b100101); + + // Vector Integer Min/Max Instructions + INSN(vmax_vv, 0b1010111, 0b000, 0b000111); + INSN(vmaxu_vv, 0b1010111, 0b000, 0b000110); + INSN(vmin_vv, 0b1010111, 0b000, 0b000101); + INSN(vminu_vv, 0b1010111, 0b000, 0b000100); + + // Vector Integer Comparison Instructions + INSN(vmsle_vv, 0b1010111, 0b000, 0b011101); + INSN(vmsleu_vv, 0b1010111, 0b000, 0b011100); + INSN(vmslt_vv, 0b1010111, 0b000, 0b011011); + INSN(vmsltu_vv, 0b1010111, 0b000, 0b011010); + INSN(vmsne_vv, 0b1010111, 0b000, 0b011001); + INSN(vmseq_vv, 0b1010111, 0b000, 0b011000); + + // Vector Single-Width Bit Shift Instructions + INSN(vsra_vv, 0b1010111, 0b000, 0b101001); + INSN(vsrl_vv, 0b1010111, 0b000, 0b101000); + INSN(vsll_vv, 0b1010111, 0b000, 0b100101); + + // Vector Bitwise Logical Instructions + INSN(vxor_vv, 0b1010111, 0b000, 0b001011); + INSN(vor_vv, 0b1010111, 0b000, 0b001010); + INSN(vand_vv, 0b1010111, 0b000, 0b001001); + + // Vector Single-Width Integer Add and Subtract + INSN(vsub_vv, 0b1010111, 0b000, 0b000010); + INSN(vadd_vv, 0b1010111, 0b000, 0b000000); + +#undef INSN + + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, Register Rs1, VectorMask vm = unmasked) { \ + patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Integer Divide Instructions + INSN(vrem_vx, 0b1010111, 0b110, 0b100011); + INSN(vremu_vx, 0b1010111, 0b110, 0b100010); + INSN(vdiv_vx, 0b1010111, 0b110, 0b100001); + INSN(vdivu_vx, 0b1010111, 0b110, 0b100000); + + // Vector Single-Width Integer Multiply Instructions + INSN(vmulhsu_vx, 0b1010111, 0b110, 0b100110); + INSN(vmulhu_vx, 0b1010111, 0b110, 0b100100); + INSN(vmulh_vx, 0b1010111, 0b110, 0b100111); + INSN(vmul_vx, 0b1010111, 0b110, 0b100101); + + // Vector Integer Min/Max Instructions + INSN(vmax_vx, 0b1010111, 0b100, 0b000111); + INSN(vmaxu_vx, 0b1010111, 0b100, 0b000110); + INSN(vmin_vx, 0b1010111, 0b100, 0b000101); + INSN(vminu_vx, 0b1010111, 0b100, 0b000100); + + // Vector Integer Comparison Instructions + INSN(vmsgt_vx, 0b1010111, 0b100, 0b011111); + INSN(vmsgtu_vx, 0b1010111, 0b100, 0b011110); + INSN(vmsle_vx, 0b1010111, 0b100, 0b011101); + INSN(vmsleu_vx, 0b1010111, 0b100, 0b011100); + INSN(vmslt_vx, 0b1010111, 0b100, 0b011011); + INSN(vmsltu_vx, 0b1010111, 0b100, 0b011010); + INSN(vmsne_vx, 0b1010111, 0b100, 0b011001); + INSN(vmseq_vx, 0b1010111, 0b100, 0b011000); + + // Vector Narrowing Integer Right Shift Instructions + INSN(vnsra_wx, 0b1010111, 0b100, 0b101101); + INSN(vnsrl_wx, 0b1010111, 0b100, 0b101100); + + // Vector Single-Width Bit Shift Instructions + INSN(vsra_vx, 0b1010111, 0b100, 0b101001); + INSN(vsrl_vx, 0b1010111, 0b100, 0b101000); + INSN(vsll_vx, 0b1010111, 0b100, 0b100101); + + // Vector Bitwise Logical Instructions + INSN(vxor_vx, 0b1010111, 0b100, 0b001011); + INSN(vor_vx, 0b1010111, 0b100, 0b001010); + INSN(vand_vx, 0b1010111, 0b100, 0b001001); + + // Vector Single-Width Integer Add and Subtract + INSN(vsub_vx, 0b1010111, 0b100, 0b000010); + INSN(vadd_vx, 0b1010111, 0b100, 0b000000); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, FloatRegister Rs1, VectorMask vm = unmasked) { \ + patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Floating-Point Compare Instructions + INSN(vmfge_vf, 0b1010111, 0b101, 0b011111); + INSN(vmfgt_vf, 0b1010111, 0b101, 0b011101); + INSN(vmfle_vf, 0b1010111, 0b101, 0b011001); + INSN(vmflt_vf, 0b1010111, 0b101, 0b011011); + INSN(vmfne_vf, 0b1010111, 0b101, 0b011100); + INSN(vmfeq_vf, 0b1010111, 0b101, 0b011000); + + // Vector Floating-Point Sign-Injection Instructions + INSN(vfsgnjx_vf, 0b1010111, 0b101, 0b001010); + INSN(vfsgnjn_vf, 0b1010111, 0b101, 0b001001); + INSN(vfsgnj_vf, 0b1010111, 0b101, 0b001000); + + // Vector Floating-Point MIN/MAX Instructions + INSN(vfmax_vf, 0b1010111, 0b101, 0b000110); + INSN(vfmin_vf, 0b1010111, 0b101, 0b000100); + + // Vector Single-Width Floating-Point Multiply/Divide Instructions + INSN(vfdiv_vf, 0b1010111, 0b101, 0b100000); + INSN(vfmul_vf, 0b1010111, 0b101, 0b100100); + INSN(vfrdiv_vf, 0b1010111, 0b101, 0b100001); + + // Vector Single-Width Floating-Point Add/Subtract Instructions + INSN(vfsub_vf, 0b1010111, 0b101, 0b000010); + INSN(vfadd_vf, 0b1010111, 0b101, 0b000000); + INSN(vfrsub_vf, 0b1010111, 0b101, 0b100111); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, int32_t imm, VectorMask vm = unmasked) { \ + guarantee(is_imm_in_range(imm, 5, 0), "imm is invalid"); \ + patch_VArith(op, Vd, funct3, (uint32_t)imm & 0x1f, Vs2, vm, funct6); \ + } + + INSN(vmsgt_vi, 0b1010111, 0b011, 0b011111); + INSN(vmsgtu_vi, 0b1010111, 0b011, 0b011110); + INSN(vmsle_vi, 0b1010111, 0b011, 0b011101); + INSN(vmsleu_vi, 0b1010111, 0b011, 0b011100); + INSN(vmsne_vi, 0b1010111, 0b011, 0b011001); + INSN(vmseq_vi, 0b1010111, 0b011, 0b011000); + INSN(vxor_vi, 0b1010111, 0b011, 0b001011); + INSN(vor_vi, 0b1010111, 0b011, 0b001010); + INSN(vand_vi, 0b1010111, 0b011, 0b001001); + INSN(vadd_vi, 0b1010111, 0b011, 0b000000); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, int32_t imm, VectorRegister Vs2, VectorMask vm = unmasked) { \ + guarantee(is_imm_in_range(imm, 5, 0), "imm is invalid"); \ + patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \ + } + + INSN(vrsub_vi, 0b1010111, 0b011, 0b000011); + +#undef INSN + +#define INSN(NAME, op, funct3, vm, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1) { \ + patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Compress Instruction + INSN(vcompress_vm, 0b1010111, 0b010, 0b1, 0b010111); + + // Vector Mask-Register Logical Instructions + INSN(vmxnor_mm, 0b1010111, 0b010, 0b1, 0b011111); + INSN(vmornot_mm, 0b1010111, 0b010, 0b1, 0b011100); + INSN(vmnor_mm, 0b1010111, 0b010, 0b1, 0b011110); + INSN(vmor_mm, 0b1010111, 0b010, 0b1, 0b011010); + INSN(vmxor_mm, 0b1010111, 0b010, 0b1, 0b011011); + INSN(vmandnot_mm, 0b1010111, 0b010, 0b1, 0b011000); + INSN(vmnand_mm, 0b1010111, 0b010, 0b1, 0b011101); + INSN(vmand_mm, 0b1010111, 0b010, 0b1, 0b011001); + +#undef INSN + +#define INSN(NAME, op, funct3, Vs2, vm, funct6) \ + void NAME(VectorRegister Vd, int32_t imm) { \ + guarantee(is_imm_in_range(imm, 5, 0), "imm is invalid"); \ + patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \ + } + + // Vector Integer Move Instructions + INSN(vmv_v_i, 0b1010111, 0b011, v0, 0b1, 0b010111); + +#undef INSN + +#define INSN(NAME, op, funct3, Vs2, vm, funct6) \ + void NAME(VectorRegister Vd, FloatRegister Rs1) { \ + patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Floating-Point Scalar Move Instructions + INSN(vfmv_s_f, 0b1010111, 0b101, v0, 0b1, 0b010000); + // Vector Floating-Point Move Instruction + INSN(vfmv_v_f, 0b1010111, 0b101, v0, 0b1, 0b010111); + +#undef INSN + +#define INSN(NAME, op, funct3, Vs2, vm, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs1) { \ + patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Integer Move Instructions + INSN(vmv_v_v, 0b1010111, 0b000, v0, 0b1, 0b010111); + +#undef INSN + +#define INSN(NAME, op, funct3, Vs2, vm, funct6) \ + void NAME(VectorRegister Vd, Register Rs1) { \ + patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Integer Scalar Move Instructions + INSN(vmv_s_x, 0b1010111, 0b110, v0, 0b1, 0b010000); + + // Vector Integer Move Instructions + INSN(vmv_v_x, 0b1010111, 0b100, v0, 0b1, 0b010111); + +#undef INSN +#undef patch_VArith + +#define INSN(NAME, op, funct13, funct6) \ + void NAME(VectorRegister Vd, VectorMask vm = unmasked) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 24, 12, funct13); \ + patch((address)&insn, 25, vm); \ + patch((address)&insn, 31, 26, funct6); \ + patch_reg((address)&insn, 7, Vd); \ + emit(insn); \ + } + + // Vector Element Index Instruction + INSN(vid_v, 0b1010111, 0b0000010001010, 0b010100); + +#undef INSN + +enum Nf { + g1 = 0b000, + g2 = 0b001, + g3 = 0b010, + g4 = 0b011, + g5 = 0b100, + g6 = 0b101, + g7 = 0b110, + g8 = 0b111 +}; + +#define patch_VLdSt(op, VReg, width, Rs1, Reg_or_umop, vm, mop, mew, nf) \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, width); \ + patch((address)&insn, 24, 20, Reg_or_umop); \ + patch((address)&insn, 25, vm); \ + patch((address)&insn, 27, 26, mop); \ + patch((address)&insn, 28, mew); \ + patch((address)&insn, 31, 29, nf); \ + patch_reg((address)&insn, 7, VReg); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn) + +#define INSN(NAME, op, lumop, vm, mop, nf) \ + void NAME(VectorRegister Vd, Register Rs1, uint32_t width = 0, bool mew = false) { \ + guarantee(is_unsigned_imm_in_range(width, 3, 0), "width is invalid"); \ + patch_VLdSt(op, Vd, width, Rs1, lumop, vm, mop, mew, nf); \ + } + + // Vector Load/Store Instructions + INSN(vl1r_v, 0b0000111, 0b01000, 0b1, 0b00, g1); + +#undef INSN + +#define INSN(NAME, op, width, sumop, vm, mop, mew, nf) \ + void NAME(VectorRegister Vs3, Register Rs1) { \ + patch_VLdSt(op, Vs3, width, Rs1, sumop, vm, mop, mew, nf); \ + } + + // Vector Load/Store Instructions + INSN(vs1r_v, 0b0100111, 0b000, 0b01000, 0b1, 0b00, 0b0, g1); + +#undef INSN + +// r2_nfvm +#define INSN(NAME, op, width, umop, mop, mew) \ + void NAME(VectorRegister Vd_or_Vs3, Register Rs1, Nf nf = g1) { \ + patch_VLdSt(op, Vd_or_Vs3, width, Rs1, umop, 1, mop, mew, nf); \ + } + + // Vector Unit-Stride Instructions + INSN(vle1_v, 0b0000111, 0b000, 0b01011, 0b00, 0b0); + INSN(vse1_v, 0b0100111, 0b000, 0b01011, 0b00, 0b0); + +#undef INSN + +#define INSN(NAME, op, width, umop, mop, mew) \ + void NAME(VectorRegister Vd_or_Vs3, Register Rs1, VectorMask vm = unmasked, Nf nf = g1) { \ + patch_VLdSt(op, Vd_or_Vs3, width, Rs1, umop, vm, mop, mew, nf); \ + } + + // Vector Unit-Stride Instructions + INSN(vle8_v, 0b0000111, 0b000, 0b00000, 0b00, 0b0); + INSN(vle16_v, 0b0000111, 0b101, 0b00000, 0b00, 0b0); + INSN(vle32_v, 0b0000111, 0b110, 0b00000, 0b00, 0b0); + INSN(vle64_v, 0b0000111, 0b111, 0b00000, 0b00, 0b0); + + // Vector unit-stride fault-only-first Instructions + INSN(vle8ff_v, 0b0000111, 0b000, 0b10000, 0b00, 0b0); + INSN(vle16ff_v, 0b0000111, 0b101, 0b10000, 0b00, 0b0); + INSN(vle32ff_v, 0b0000111, 0b110, 0b10000, 0b00, 0b0); + INSN(vle64ff_v, 0b0000111, 0b111, 0b10000, 0b00, 0b0); + + INSN(vse8_v, 0b0100111, 0b000, 0b00000, 0b00, 0b0); + INSN(vse16_v, 0b0100111, 0b101, 0b00000, 0b00, 0b0); + INSN(vse32_v, 0b0100111, 0b110, 0b00000, 0b00, 0b0); + INSN(vse64_v, 0b0100111, 0b111, 0b00000, 0b00, 0b0); + +#undef INSN + +#define INSN(NAME, op, width, mop, mew) \ + void NAME(VectorRegister Vd, Register Rs1, VectorRegister Vs2, VectorMask vm = unmasked, Nf nf = g1) { \ + patch_VLdSt(op, Vd, width, Rs1, Vs2->encoding_nocheck(), vm, mop, mew, nf); \ + } + + // Vector unordered indexed load instructions + INSN(vluxei8_v, 0b0000111, 0b000, 0b01, 0b0); + INSN(vluxei16_v, 0b0000111, 0b101, 0b01, 0b0); + INSN(vluxei32_v, 0b0000111, 0b110, 0b01, 0b0); + INSN(vluxei64_v, 0b0000111, 0b111, 0b01, 0b0); + + // Vector ordered indexed load instructions + INSN(vloxei8_v, 0b0000111, 0b000, 0b11, 0b0); + INSN(vloxei16_v, 0b0000111, 0b101, 0b11, 0b0); + INSN(vloxei32_v, 0b0000111, 0b110, 0b11, 0b0); + INSN(vloxei64_v, 0b0000111, 0b111, 0b11, 0b0); +#undef INSN + +#define INSN(NAME, op, width, mop, mew) \ + void NAME(VectorRegister Vd, Register Rs1, Register Rs2, VectorMask vm = unmasked, Nf nf = g1) { \ + patch_VLdSt(op, Vd, width, Rs1, Rs2->encoding_nocheck(), vm, mop, mew, nf); \ + } + + // Vector Strided Instructions + INSN(vlse8_v, 0b0000111, 0b000, 0b10, 0b0); + INSN(vlse16_v, 0b0000111, 0b101, 0b10, 0b0); + INSN(vlse32_v, 0b0000111, 0b110, 0b10, 0b0); + INSN(vlse64_v, 0b0000111, 0b111, 0b10, 0b0); + +#undef INSN +#undef patch_VLdSt + +// ==================================== +// RISC-V Bit-Manipulation Extension +// ==================================== +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + emit(insn); \ + } + + INSN(add_uw, 0b0111011, 0b000, 0b0000100); + INSN(rol, 0b0110011, 0b001, 0b0110000); + INSN(rolw, 0b0111011, 0b001, 0b0110000); + INSN(ror, 0b0110011, 0b101, 0b0110000); + INSN(rorw, 0b0111011, 0b101, 0b0110000); + INSN(sh1add, 0b0110011, 0b010, 0b0010000); + INSN(sh2add, 0b0110011, 0b100, 0b0010000); + INSN(sh3add, 0b0110011, 0b110, 0b0010000); + INSN(sh1add_uw, 0b0111011, 0b010, 0b0010000); + INSN(sh2add_uw, 0b0111011, 0b100, 0b0010000); + INSN(sh3add_uw, 0b0111011, 0b110, 0b0010000); + INSN(andn, 0b0110011, 0b111, 0b0100000); + INSN(orn, 0b0110011, 0b110, 0b0100000); + INSN(xnor, 0b0110011, 0b100, 0b0100000); + INSN(max, 0b0110011, 0b110, 0b0000101); + INSN(maxu, 0b0110011, 0b111, 0b0000101); + INSN(min, 0b0110011, 0b100, 0b0000101); + INSN(minu, 0b0110011, 0b101, 0b0000101); + +#undef INSN + +#define INSN(NAME, op, funct3, funct12) \ + void NAME(Register Rd, Register Rs1) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 20, funct12); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(rev8, 0b0010011, 0b101, 0b011010111000); + INSN(sext_b, 0b0010011, 0b001, 0b011000000100); + INSN(sext_h, 0b0010011, 0b001, 0b011000000101); + INSN(zext_h, 0b0111011, 0b100, 0b000010000000); + INSN(clz, 0b0010011, 0b001, 0b011000000000); + INSN(clzw, 0b0011011, 0b001, 0b011000000000); + INSN(ctz, 0b0010011, 0b001, 0b011000000001); + INSN(ctzw, 0b0011011, 0b001, 0b011000000001); + INSN(cpop, 0b0010011, 0b001, 0b011000000010); + INSN(cpopw, 0b0011011, 0b001, 0b011000000010); + INSN(orc_b, 0b0010011, 0b101, 0b001010000111); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) {\ + guarantee(shamt <= 0x3f, "Shamt is invalid"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 25, 20, shamt); \ + patch((address)&insn, 31, 26, funct6); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(rori, 0b0010011, 0b101, 0b011000); + INSN(slli_uw, 0b0011011, 0b001, 0b000010); + +#undef INSN + +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) {\ + guarantee(shamt <= 0x1f, "Shamt is invalid"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 24, 20, shamt); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(roriw, 0b0011011, 0b101, 0b0110000); + +#undef INSN + +// ======================================== +// RISC-V Compressed Instructions Extension +// ======================================== +// Note: +// 1. When UseRVC is enabled, 32-bit instructions under 'CompressibleRegion's will be +// transformed to 16-bit instructions if compressible. +// 2. RVC instructions in Assembler always begin with 'c_' prefix, as 'c_li', +// but most of time we have no need to explicitly use these instructions. +// 3. 'CompressibleRegion' is introduced to hint instructions in this Region's RTTI range +// are qualified to be compressed with their 2-byte versions. +// An example: +// +// CompressibleRegion cr(_masm); +// __ andr(...); // this instruction could change to c.and if able to +// +// 4. Using -XX:PrintAssemblyOptions=no-aliases could distinguish RVC instructions from +// normal ones. +// + +private: + bool _in_compressible_region; +public: + bool in_compressible_region() const { return _in_compressible_region; } + void set_in_compressible_region(bool b) { _in_compressible_region = b; } +public: + + // a compressible region + class CompressibleRegion : public StackObj { + protected: + Assembler *_masm; + bool _saved_in_compressible_region; + public: + CompressibleRegion(Assembler *_masm) + : _masm(_masm) + , _saved_in_compressible_region(_masm->in_compressible_region()) { + _masm->set_in_compressible_region(true); + } + ~CompressibleRegion() { + _masm->set_in_compressible_region(_saved_in_compressible_region); + } + }; + + // patch a 16-bit instruction. + static void c_patch(address a, unsigned msb, unsigned lsb, uint16_t val) { + assert_cond(a != NULL); + assert_cond(msb >= lsb && msb <= 15); + unsigned nbits = msb - lsb + 1; + guarantee(val < (1U << nbits), "Field too big for insn"); + uint16_t mask = (1U << nbits) - 1; + val <<= lsb; + mask <<= lsb; + uint16_t target = *(uint16_t *)a; + target &= ~mask; + target |= val; + *(uint16_t *)a = target; + } + + static void c_patch(address a, unsigned bit, uint16_t val) { + c_patch(a, bit, bit, val); + } + + // patch a 16-bit instruction with a general purpose register ranging [0, 31] (5 bits) + static void c_patch_reg(address a, unsigned lsb, Register reg) { + c_patch(a, lsb + 4, lsb, reg->encoding_nocheck()); + } + + // patch a 16-bit instruction with a general purpose register ranging [8, 15] (3 bits) + static void c_patch_compressed_reg(address a, unsigned lsb, Register reg) { + c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck()); + } + + // patch a 16-bit instruction with a float register ranging [0, 31] (5 bits) + static void c_patch_reg(address a, unsigned lsb, FloatRegister reg) { + c_patch(a, lsb + 4, lsb, reg->encoding_nocheck()); + } + + // patch a 16-bit instruction with a float register ranging [8, 15] (3 bits) + static void c_patch_compressed_reg(address a, unsigned lsb, FloatRegister reg) { + c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck()); + } + +// -------------- RVC Instruction Definitions -------------- + + void c_nop() { + c_addi(x0, 0); + } + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd_Rs1, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 6, 0)); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ + c_patch_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_addi, 0b000, 0b01); + INSN(c_addiw, 0b001, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 10, 0)); \ + assert_cond((imm & 0b1111) == 0); \ + assert_cond(imm != 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 4, 3, (imm & right_n_bits(9)) >> 7); \ + c_patch((address)&insn, 5, 5, (imm & nth_bit(6)) >> 6); \ + c_patch((address)&insn, 6, 6, (imm & nth_bit(4)) >> 4); \ + c_patch_reg((address)&insn, 7, sp); \ + c_patch((address)&insn, 12, 12, (imm & nth_bit(9)) >> 9); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_addi16sp, 0b011, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 10, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + assert_cond(uimm != 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_compressed_reg((address)&insn, 2, Rd); \ + c_patch((address)&insn, 5, 5, (uimm & nth_bit(3)) >> 3); \ + c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \ + c_patch((address)&insn, 10, 7, (uimm & right_n_bits(10)) >> 6); \ + c_patch((address)&insn, 12, 11, (uimm & right_n_bits(6)) >> 4); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_addi4spn, 0b000, 0b00); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd_Rs1, uint32_t shamt) { \ + assert_cond(is_unsigned_imm_in_range(shamt, 6, 0)); \ + assert_cond(shamt != 0); \ + assert_cond(Rd_Rs1 != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5))); \ + c_patch_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_slli, 0b000, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, funct2, op) \ + void NAME(Register Rd_Rs1, uint32_t shamt) { \ + assert_cond(is_unsigned_imm_in_range(shamt, 6, 0)); \ + assert_cond(shamt != 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5))); \ + c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 11, 10, funct2); \ + c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_srli, 0b100, 0b00, 0b01); + INSN(c_srai, 0b100, 0b01, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, funct2, op) \ + void NAME(Register Rd_Rs1, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 6, 0)); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ + c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 11, 10, funct2); \ + c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_andi, 0b100, 0b10, 0b01); + +#undef INSN + +#define INSN(NAME, funct6, funct2, op) \ + void NAME(Register Rd_Rs1, Register Rs2) { \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_compressed_reg((address)&insn, 2, Rs2); \ + c_patch((address)&insn, 6, 5, funct2); \ + c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 15, 10, funct6); \ + emit_int16(insn); \ + } + + INSN(c_sub, 0b100011, 0b00, 0b01); + INSN(c_xor, 0b100011, 0b01, 0b01); + INSN(c_or, 0b100011, 0b10, 0b01); + INSN(c_and, 0b100011, 0b11, 0b01); + INSN(c_subw, 0b100111, 0b00, 0b01); + INSN(c_addw, 0b100111, 0b01, 0b01); + +#undef INSN + +#define INSN(NAME, funct4, op) \ + void NAME(Register Rd_Rs1, Register Rs2) { \ + assert_cond(Rd_Rs1 != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_reg((address)&insn, 2, Rs2); \ + c_patch_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 15, 12, funct4); \ + emit_int16(insn); \ + } + + INSN(c_mv, 0b1000, 0b10); + INSN(c_add, 0b1001, 0b10); + +#undef INSN + +#define INSN(NAME, funct4, op) \ + void NAME(Register Rs1) { \ + assert_cond(Rs1 != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_reg((address)&insn, 2, x0); \ + c_patch_reg((address)&insn, 7, Rs1); \ + c_patch((address)&insn, 15, 12, funct4); \ + emit_int16(insn); \ + } + + INSN(c_jr, 0b1000, 0b10); + INSN(c_jalr, 0b1001, 0b10); + +#undef INSN + + typedef void (Assembler::* j_c_insn)(address dest); + typedef void (Assembler::* compare_and_branch_c_insn)(Register Rs1, address dest); + + void wrap_label(Label &L, j_c_insn insn) { + if (L.is_bound()) { + (this->*insn)(target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(pc()); + } + } + + void wrap_label(Label &L, Register r, compare_and_branch_c_insn insn) { + if (L.is_bound()) { + (this->*insn)(r, target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(r, pc()); + } + } + +#define INSN(NAME, funct3, op) \ + void NAME(int32_t offset) { \ + assert_cond(is_imm_in_range(offset, 11, 1)); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 2, 2, (offset & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 5, 3, (offset & right_n_bits(4)) >> 1); \ + c_patch((address)&insn, 6, 6, (offset & nth_bit(7)) >> 7); \ + c_patch((address)&insn, 7, 7, (offset & nth_bit(6)) >> 6); \ + c_patch((address)&insn, 8, 8, (offset & nth_bit(10)) >> 10); \ + c_patch((address)&insn, 10, 9, (offset & right_n_bits(10)) >> 8); \ + c_patch((address)&insn, 11, 11, (offset & nth_bit(4)) >> 4); \ + c_patch((address)&insn, 12, 12, (offset & nth_bit(11)) >> 11); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } \ + void NAME(address dest) { \ + assert_cond(dest != NULL); \ + int64_t distance = dest - pc(); \ + assert_cond(is_imm_in_range(distance, 11, 1)); \ + c_j(distance); \ + } \ + void NAME(Label &L) { \ + wrap_label(L, &Assembler::NAME); \ + } + + INSN(c_j, 0b101, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rs1, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 8, 1)); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 4, 3, (imm & right_n_bits(3)) >> 1); \ + c_patch((address)&insn, 6, 5, (imm & right_n_bits(8)) >> 6); \ + c_patch_compressed_reg((address)&insn, 7, Rs1); \ + c_patch((address)&insn, 11, 10, (imm & right_n_bits(5)) >> 3); \ + c_patch((address)&insn, 12, 12, (imm & nth_bit(8)) >> 8); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } \ + void NAME(Register Rs1, address dest) { \ + assert_cond(dest != NULL); \ + int64_t distance = dest - pc(); \ + assert_cond(is_imm_in_range(distance, 8, 1)); \ + NAME(Rs1, distance); \ + } \ + void NAME(Register Rs1, Label &L) { \ + wrap_label(L, Rs1, &Assembler::NAME); \ + } + + INSN(c_beqz, 0b110, 0b01); + INSN(c_bnez, 0b111, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 18, 0)); \ + assert_cond((imm & 0xfff) == 0); \ + assert_cond(imm != 0); \ + assert_cond(Rd != x0 && Rd != x2); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (imm & right_n_bits(17)) >> 12); \ + c_patch_reg((address)&insn, 7, Rd); \ + c_patch((address)&insn, 12, 12, (imm & nth_bit(17)) >> 17); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_lui, 0b011, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 6, 0)); \ + assert_cond(Rd != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ + c_patch_reg((address)&insn, 7, Rd); \ + c_patch((address)&insn, 12, 12, (imm & right_n_bits(6)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_li, 0b010, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ + assert_cond((uimm & 0b111) == 0); \ + assert_cond(Rd != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6); \ + c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3); \ + c_patch_reg((address)&insn, 7, Rd); \ + c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_ldsp, 0b011, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(FloatRegister Rd, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ + assert_cond((uimm & 0b111) == 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6); \ + c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3); \ + c_patch_reg((address)&insn, 7, Rd); \ + c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_fldsp, 0b001, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, op, REGISTER_TYPE) \ + void NAME(REGISTER_TYPE Rd_Rs2, Register Rs1, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ + assert_cond((uimm & 0b111) == 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); \ + c_patch((address)&insn, 6, 5, (uimm & right_n_bits(8)) >> 6); \ + c_patch_compressed_reg((address)&insn, 7, Rs1); \ + c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_ld, 0b011, 0b00, Register); + INSN(c_sd, 0b111, 0b00, Register); + INSN(c_fld, 0b001, 0b00, FloatRegister); + INSN(c_fsd, 0b101, 0b00, FloatRegister); + +#undef INSN + +#define INSN(NAME, funct3, op, REGISTER_TYPE) \ + void NAME(REGISTER_TYPE Rs2, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ + assert_cond((uimm & 0b111) == 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_reg((address)&insn, 2, Rs2); \ + c_patch((address)&insn, 9, 7, (uimm & right_n_bits(9)) >> 6); \ + c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_sdsp, 0b111, 0b10, Register); + INSN(c_fsdsp, 0b101, 0b10, FloatRegister); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rs2, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_reg((address)&insn, 2, Rs2); \ + c_patch((address)&insn, 8, 7, (uimm & right_n_bits(8)) >> 6); \ + c_patch((address)&insn, 12, 9, (uimm & right_n_bits(6)) >> 2); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_swsp, 0b110, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + assert_cond(Rd != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 3, 2, (uimm & right_n_bits(8)) >> 6); \ + c_patch((address)&insn, 6, 4, (uimm & right_n_bits(5)) >> 2); \ + c_patch_reg((address)&insn, 7, Rd); \ + c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_lwsp, 0b010, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd_Rs2, Register Rs1, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 7, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); \ + c_patch((address)&insn, 5, 5, (uimm & nth_bit(6)) >> 6); \ + c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \ + c_patch_compressed_reg((address)&insn, 7, Rs1); \ + c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_lw, 0b010, 0b00); + INSN(c_sw, 0b110, 0b00); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME() { \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 11, 2, 0x0); \ + c_patch((address)&insn, 12, 12, 0b1); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_ebreak, 0b100, 0b10); + +#undef INSN + +// -------------- RVC Transformation Functions -------------- + +// -------------------------- +// Register instructions +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + /* add -> c.add */ \ + if (do_compress()) { \ + Register src = noreg; \ + if (Rs1 != x0 && Rs2 != x0 && ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) { \ + c_add(Rd, src); \ + return; \ + } \ + } \ + _add(Rd, Rs1, Rs2); \ + } + + INSN(add); + +#undef INSN + +// -------------------------- +#define INSN(NAME, C_NAME, NORMAL_NAME) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + /* sub/subw -> c.sub/c.subw */ \ + if (do_compress() && \ + (Rd == Rs1 && Rd->is_compressed_valid() && Rs2->is_compressed_valid())) { \ + C_NAME(Rd, Rs2); \ + return; \ + } \ + NORMAL_NAME(Rd, Rs1, Rs2); \ + } + + INSN(sub, c_sub, _sub); + INSN(subw, c_subw, _subw); + +#undef INSN + +// -------------------------- +#define INSN(NAME, C_NAME, NORMAL_NAME) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + /* and/or/xor/addw -> c.and/c.or/c.xor/c.addw */ \ + if (do_compress()) { \ + Register src = noreg; \ + if (Rs1->is_compressed_valid() && Rs2->is_compressed_valid() && \ + ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) { \ + C_NAME(Rd, src); \ + return; \ + } \ + } \ + NORMAL_NAME(Rd, Rs1, Rs2); \ + } + + INSN(andr, c_and, _andr); + INSN(orr, c_or, _orr); + INSN(xorr, c_xor, _xorr); + INSN(addw, c_addw, _addw); + +#undef INSN + +private: +// some helper functions + bool do_compress() const { + return UseRVC && in_compressible_region(); + } + +#define FUNC(NAME, funct3, bits) \ + bool NAME(Register rs1, Register rd_rs2, int32_t imm12, bool ld) { \ + return rs1 == sp && \ + is_unsigned_imm_in_range(imm12, bits, 0) && \ + (intx(imm12) & funct3) == 0x0 && \ + (!ld || rd_rs2 != x0); \ + } \ + + FUNC(is_c_ldsdsp, 0b111, 9); + FUNC(is_c_lwswsp, 0b011, 8); + +#undef FUNC + +#define FUNC(NAME, funct3, bits) \ + bool NAME(Register rs1, int32_t imm12) { \ + return rs1 == sp && \ + is_unsigned_imm_in_range(imm12, bits, 0) && \ + (intx(imm12) & funct3) == 0x0; \ + } \ + + FUNC(is_c_fldsdsp, 0b111, 9); + +#undef FUNC + +#define FUNC(NAME, REG_TYPE, funct3, bits) \ + bool NAME(Register rs1, REG_TYPE rd_rs2, int32_t imm12) { \ + return rs1->is_compressed_valid() && \ + rd_rs2->is_compressed_valid() && \ + is_unsigned_imm_in_range(imm12, bits, 0) && \ + (intx(imm12) & funct3) == 0x0; \ + } \ + + FUNC(is_c_ldsd, Register, 0b111, 8); + FUNC(is_c_lwsw, Register, 0b011, 7); + FUNC(is_c_fldsd, FloatRegister, 0b111, 8); + +#undef FUNC + +public: +// -------------------------- +// Load/store register +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + /* lw -> c.lwsp/c.lw */ \ + if (do_compress()) { \ + if (is_c_lwswsp(Rs, Rd, offset, true)) { \ + c_lwsp(Rd, offset); \ + return; \ + } else if (is_c_lwsw(Rs, Rd, offset)) { \ + c_lw(Rd, Rs, offset); \ + return; \ + } \ + } \ + _lw(Rd, Rs, offset); \ + } + + INSN(lw); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + /* ld -> c.ldsp/c.ld */ \ + if (do_compress()) { \ + if (is_c_ldsdsp(Rs, Rd, offset, true)) { \ + c_ldsp(Rd, offset); \ + return; \ + } else if (is_c_ldsd(Rs, Rd, offset)) { \ + c_ld(Rd, Rs, offset); \ + return; \ + } \ + } \ + _ld(Rd, Rs, offset); \ + } + + INSN(ld); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ + /* fld -> c.fldsp/c.fld */ \ + if (do_compress()) { \ + if (is_c_fldsdsp(Rs, offset)) { \ + c_fldsp(Rd, offset); \ + return; \ + } else if (is_c_fldsd(Rs, Rd, offset)) { \ + c_fld(Rd, Rs, offset); \ + return; \ + } \ + } \ + _fld(Rd, Rs, offset); \ + } + + INSN(fld); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + /* sd -> c.sdsp/c.sd */ \ + if (do_compress()) { \ + if (is_c_ldsdsp(Rs, Rd, offset, false)) { \ + c_sdsp(Rd, offset); \ + return; \ + } else if (is_c_ldsd(Rs, Rd, offset)) { \ + c_sd(Rd, Rs, offset); \ + return; \ + } \ + } \ + _sd(Rd, Rs, offset); \ + } + + INSN(sd); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + /* sw -> c.swsp/c.sw */ \ + if (do_compress()) { \ + if (is_c_lwswsp(Rs, Rd, offset, false)) { \ + c_swsp(Rd, offset); \ + return; \ + } else if (is_c_lwsw(Rs, Rd, offset)) { \ + c_sw(Rd, Rs, offset); \ + return; \ + } \ + } \ + _sw(Rd, Rs, offset); \ + } + + INSN(sw); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ + /* fsd -> c.fsdsp/c.fsd */ \ + if (do_compress()) { \ + if (is_c_fldsdsp(Rs, offset)) { \ + c_fsdsp(Rd, offset); \ + return; \ + } else if (is_c_fldsd(Rs, Rd, offset)) { \ + c_fsd(Rd, Rs, offset); \ + return; \ + } \ + } \ + _fsd(Rd, Rs, offset); \ + } + + INSN(fsd); + +#undef INSN + +// -------------------------- +// Conditional branch instructions +// -------------------------- +#define INSN(NAME, C_NAME, NORMAL_NAME) \ + void NAME(Register Rs1, Register Rs2, const int64_t offset) { \ + /* beq/bne -> c.beqz/c.bnez */ \ + if (do_compress() && \ + (offset != 0 && Rs2 == x0 && Rs1->is_compressed_valid() && \ + is_imm_in_range(offset, 8, 1))) { \ + C_NAME(Rs1, offset); \ + return; \ + } \ + NORMAL_NAME(Rs1, Rs2, offset); \ + } + + INSN(beq, c_beqz, _beq); + INSN(bne, c_bnez, _bne); + +#undef INSN + +// -------------------------- +// Unconditional branch instructions +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, const int32_t offset) { \ + /* jal -> c.j */ \ + if (do_compress() && offset != 0 && Rd == x0 && is_imm_in_range(offset, 11, 1)) { \ + c_j(offset); \ + return; \ + } \ + _jal(Rd, offset); \ + } + + INSN(jal); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + /* jalr -> c.jr/c.jalr */ \ + if (do_compress() && (offset == 0 && Rs != x0)) { \ + if (Rd == x1) { \ + c_jalr(Rs); \ + return; \ + } else if (Rd == x0) { \ + c_jr(Rs); \ + return; \ + } \ + } \ + _jalr(Rd, Rs, offset); \ + } + + INSN(jalr); + +#undef INSN + +// -------------------------- +// Miscellaneous Instructions +// -------------------------- +#define INSN(NAME) \ + void NAME() { \ + /* ebreak -> c.ebreak */ \ + if (do_compress()) { \ + c_ebreak(); \ + return; \ + } \ + _ebreak(); \ + } + + INSN(ebreak); + +#undef INSN + +#define INSN(NAME) \ + void NAME() { \ + /* The illegal instruction in RVC is presented by a 16-bit 0. */ \ + if (do_compress()) { \ + emit_int16(0); \ + return; \ + } \ + _halt(); \ + } + + INSN(halt); + +#undef INSN + +// -------------------------- +// Immediate Instructions +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, int64_t imm) { \ + /* li -> c.li */ \ + if (do_compress() && (is_imm_in_range(imm, 6, 0) && Rd != x0)) { \ + c_li(Rd, imm); \ + return; \ + } \ + _li(Rd, imm); \ + } + + INSN(li); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs1, int32_t imm) { \ + /* addi -> c.addi/c.nop/c.mv/c.addi16sp/c.addi4spn */ \ + if (do_compress()) { \ + if (Rd == Rs1 && is_imm_in_range(imm, 6, 0)) { \ + c_addi(Rd, imm); \ + return; \ + } else if (imm == 0 && Rd != x0 && Rs1 != x0) { \ + c_mv(Rd, Rs1); \ + return; \ + } else if (Rs1 == sp && imm != 0) { \ + if (Rd == Rs1 && (imm & 0b1111) == 0x0 && is_imm_in_range(imm, 10, 0)) { \ + c_addi16sp(imm); \ + return; \ + } else if (Rd->is_compressed_valid() && (imm & 0b11) == 0x0 && is_unsigned_imm_in_range(imm, 10, 0)) { \ + c_addi4spn(Rd, imm); \ + return; \ + } \ + } \ + } \ + _addi(Rd, Rs1, imm); \ + } + + INSN(addi); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs1, int32_t imm) { \ + /* addiw -> c.addiw */ \ + if (do_compress() && (Rd == Rs1 && Rd != x0 && is_imm_in_range(imm, 6, 0))) { \ + c_addiw(Rd, imm); \ + return; \ + } \ + _addiw(Rd, Rs1, imm); \ + } + + INSN(addiw); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs1, int32_t imm) { \ + /* and_imm12 -> c.andi */ \ + if (do_compress() && \ + (Rd == Rs1 && Rd->is_compressed_valid() && is_imm_in_range(imm, 6, 0))) { \ + c_andi(Rd, imm); \ + return; \ + } \ + _and_imm12(Rd, Rs1, imm); \ + } + + INSN(and_imm12); + +#undef INSN + +// -------------------------- +// Shift Immediate Instructions +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) { \ + /* slli -> c.slli */ \ + if (do_compress() && (Rd == Rs1 && Rd != x0 && shamt != 0)) { \ + c_slli(Rd, shamt); \ + return; \ + } \ + _slli(Rd, Rs1, shamt); \ + } + + INSN(slli); + +#undef INSN + +// -------------------------- +#define INSN(NAME, C_NAME, NORMAL_NAME) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) { \ + /* srai/srli -> c.srai/c.srli */ \ + if (do_compress() && (Rd == Rs1 && Rd->is_compressed_valid() && shamt != 0)) { \ + C_NAME(Rd, shamt); \ + return; \ + } \ + NORMAL_NAME(Rd, Rs1, shamt); \ + } + + INSN(srai, c_srai, _srai); + INSN(srli, c_srli, _srli); + +#undef INSN + +// -------------------------- +// Upper Immediate Instruction +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, int32_t imm) { \ + /* lui -> c.lui */ \ + if (do_compress() && (Rd != x0 && Rd != x2 && imm != 0 && is_imm_in_range(imm, 18, 0))) { \ + c_lui(Rd, imm); \ + return; \ + } \ + _lui(Rd, imm); \ + } + + INSN(lui); + +#undef INSN + +// --------------------------------------------------------------------------------------- + + void bgt(Register Rs, Register Rt, const address &dest); + void ble(Register Rs, Register Rt, const address &dest); + void bgtu(Register Rs, Register Rt, const address &dest); + void bleu(Register Rs, Register Rt, const address &dest); + void bgt(Register Rs, Register Rt, Label &l, bool is_far = false); + void ble(Register Rs, Register Rt, Label &l, bool is_far = false); + void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false); + void bleu(Register Rs, Register Rt, Label &l, bool is_far = false); + + typedef void (Assembler::* jal_jalr_insn)(Register Rt, address dest); + typedef void (Assembler::* load_insn_by_temp)(Register Rt, address dest, Register temp); + typedef void (Assembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest); + typedef void (Assembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far); + + void wrap_label(Register r1, Register r2, Label &L, compare_and_branch_insn insn, + compare_and_branch_label_insn neg_insn, bool is_far); + void wrap_label(Register r, Label &L, Register t, load_insn_by_temp insn); + void wrap_label(Register r, Label &L, jal_jalr_insn insn); + + // calculate pseudoinstruction + void add(Register Rd, Register Rn, int64_t increment, Register temp = t0); + void addw(Register Rd, Register Rn, int64_t increment, Register temp = t0); + void sub(Register Rd, Register Rn, int64_t decrement, Register temp = t0); + void subw(Register Rd, Register Rn, int64_t decrement, Register temp = t0); + + // RVB pseudo instructions + // zero extend word + void zext_w(Register Rd, Register Rs); + + Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) { + } + + // Stack overflow checking + virtual void bang_stack_with_offset(int offset) { Unimplemented(); } + + static bool operand_valid_for_add_immediate(long imm) { + return is_imm_in_range(imm, 12, 0); + } + + // The maximum range of a branch is fixed for the RISCV architecture. + static const unsigned long branch_range = 1 * M; + + static bool reachable_from_branch_at(address branch, address target) { + return uabs(target - branch) < branch_range; + } + + virtual ~Assembler() {} +}; + +class BiasedLockingCounters; + +#endif // CPU_RISCV_ASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP +#define CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +inline bool is_imm_in_range(long value, unsigned bits, unsigned align_bits) { + intx sign_bits = (value >> (bits + align_bits - 1)); + return ((value & right_n_bits(align_bits)) == 0) && ((sign_bits == 0) || (sign_bits == -1)); +} + +inline bool is_unsigned_imm_in_range(intx value, unsigned bits, unsigned align_bits) { + return (value >= 0) && ((value & right_n_bits(align_bits)) == 0) && ((value >> (align_bits + bits)) == 0); +} + +inline bool is_offset_in_range(intx offset, unsigned bits) { + return is_imm_in_range(offset, bits, 0); +} + +#endif // CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/bytes_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/bytes_riscv.hpp @@ -0,0 +1,167 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016 SAP SE. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_BYTES_RISCV_HPP +#define CPU_RISCV_BYTES_RISCV_HPP + +#include "memory/allocation.hpp" + +class Bytes: AllStatic { + public: + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering + // RISCV needs to check for alignment. + + // Forward declarations of the compiler-dependent implementation + static inline u2 swap_u2(u2 x); + static inline u4 swap_u4(u4 x); + static inline u8 swap_u8(u8 x); + + static inline u2 get_native_u2(address p) { + if ((intptr_t(p) & 1) == 0) { + return *(u2*)p; + } else { + return ((u2)(p[1]) << 8) | + ((u2)(p[0])); + } + } + + static inline u4 get_native_u4(address p) { + switch (intptr_t(p) & 3) { + case 0: + return *(u4*)p; + + case 2: + return ((u4)(((u2*)p)[1]) << 16) | + ((u4)(((u2*)p)[0])); + + default: + return ((u4)(p[3]) << 24) | + ((u4)(p[2]) << 16) | + ((u4)(p[1]) << 8) | + ((u4)(p[0])); + } + } + + static inline u8 get_native_u8(address p) { + switch (intptr_t(p) & 7) { + case 0: + return *(u8*)p; + + case 4: + return ((u8)(((u4*)p)[1]) << 32) | + ((u8)(((u4*)p)[0])); + + case 2: + return ((u8)(((u2*)p)[3]) << 48) | + ((u8)(((u2*)p)[2]) << 32) | + ((u8)(((u2*)p)[1]) << 16) | + ((u8)(((u2*)p)[0])); + + default: + return ((u8)(p[7]) << 56) | + ((u8)(p[6]) << 48) | + ((u8)(p[5]) << 40) | + ((u8)(p[4]) << 32) | + ((u8)(p[3]) << 24) | + ((u8)(p[2]) << 16) | + ((u8)(p[1]) << 8) | + ((u8)(p[0])); + } + } + + static inline void put_native_u2(address p, u2 x) { + if ((intptr_t(p) & 1) == 0) { + *(u2*)p = x; + } else { + p[1] = x >> 8; + p[0] = x; + } + } + + static inline void put_native_u4(address p, u4 x) { + switch (intptr_t(p) & 3) { + case 0: + *(u4*)p = x; + break; + + case 2: + ((u2*)p)[1] = x >> 16; + ((u2*)p)[0] = x; + break; + + default: + ((u1*)p)[3] = x >> 24; + ((u1*)p)[2] = x >> 16; + ((u1*)p)[1] = x >> 8; + ((u1*)p)[0] = x; + break; + } + } + + static inline void put_native_u8(address p, u8 x) { + switch (intptr_t(p) & 7) { + case 0: + *(u8*)p = x; + break; + + case 4: + ((u4*)p)[1] = x >> 32; + ((u4*)p)[0] = x; + break; + + case 2: + ((u2*)p)[3] = x >> 48; + ((u2*)p)[2] = x >> 32; + ((u2*)p)[1] = x >> 16; + ((u2*)p)[0] = x; + break; + + default: + ((u1*)p)[7] = x >> 56; + ((u1*)p)[6] = x >> 48; + ((u1*)p)[5] = x >> 40; + ((u1*)p)[4] = x >> 32; + ((u1*)p)[3] = x >> 24; + ((u1*)p)[2] = x >> 16; + ((u1*)p)[1] = x >> 8; + ((u1*)p)[0] = x; + break; + } + } + + // Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering) + static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } + static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } + static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } + + static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } + static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } + static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } +}; + +#include OS_CPU_HEADER(bytes) + +#endif // CPU_RISCV_BYTES_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp @@ -0,0 +1,351 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "classfile/javaClasses.hpp" +#include "nativeInst_riscv.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_riscv.inline.hpp" + + +#define __ ce->masm()-> + +void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset()); + __ code_section()->relocate(__ pc(), safepoint_pc.rspec()); + __ la(t0, safepoint_pc.target()); + __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); + + assert(SharedRuntime::polling_page_return_handler_blob() != NULL, + "polling page return stub not created yet"); + address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); + + __ far_jump(RuntimeAddress(stub)); +} + +void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + Metadata *m = _method->as_constant_ptr()->as_metadata(); + __ mov_metadata(t0, m); + ce->store_parameter(t0, 1); + ce->store_parameter(_bci, 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ j(_continuation); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) + : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) + : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +void RangeCheckStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_info->deoptimize_on_exception()) { + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ far_call(RuntimeAddress(a)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); + return; + } + + if (_index->is_cpu_register()) { + __ mv(t0, _index->as_register()); + } else { + __ mv(t0, _index->as_jint()); + } + Runtime1::StubID stub_id; + if (_throw_index_out_of_bounds_exception) { + stub_id = Runtime1::throw_index_exception_id; + } else { + assert(_array != NULL, "sanity"); + __ mv(t1, _array->as_pointer_register()); + stub_id = Runtime1::throw_range_check_failed_id; + } + int32_t off = 0; + __ la_patchable(ra, RuntimeAddress(Runtime1::entry_for(stub_id)), off); + __ jalr(ra, ra, off); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { + _info = new CodeEmitInfo(info); +} + +void PredicateFailedStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ far_call(RuntimeAddress(a)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +void DivByZeroStub::emit_code(LIR_Assembler* ce) { + if (_offset != -1) { + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + } + __ bind(_entry); + __ far_call(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); +#ifdef ASSERT + __ should_not_reach_here(); +#endif +} + +// Implementation of NewInstanceStub +NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) { + _result = result; + _klass = klass; + _klass_reg = klass_reg; + _info = new CodeEmitInfo(info); + assert(stub_id == Runtime1::new_instance_id || + stub_id == Runtime1::fast_new_instance_id || + stub_id == Runtime1::fast_new_instance_init_check_id, + "need new_instance id"); + _stub_id = stub_id; +} + +void NewInstanceStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + __ mv(x13, _klass_reg->as_register()); + __ far_call(RuntimeAddress(Runtime1::entry_for(_stub_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == x10, "result must in x10"); + __ j(_continuation); +} + +// Implementation of NewTypeArrayStub +NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _length = length; + _result = result; + _info = new CodeEmitInfo(info); +} + +void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == x9, "length must in x9"); + assert(_klass_reg->as_register() == x13, "klass_reg must in x13"); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == x10, "result must in x10"); + __ j(_continuation); +} + +// Implementation of NewObjectArrayStub +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _result = result; + _length = length; + _info = new CodeEmitInfo(info); +} + +void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == x9, "length must in x9"); + assert(_klass_reg->as_register() == x13, "klass_reg must in x13"); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == x10, "result must in x10"); + __ j(_continuation); +} + +// Implementation of MonitorAccessStubs +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) +: MonitorAccessStub(obj_reg, lock_reg) { + _info = new CodeEmitInfo(info); +} + +void MonitorEnterStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_obj_reg->as_register(), 1); + ce->store_parameter(_lock_reg->as_register(), 0); + Runtime1::StubID enter_id; + if (ce->compilation()->has_fpu_code()) { + enter_id = Runtime1::monitorenter_id; + } else { + enter_id = Runtime1::monitorenter_nofpu_id; + } + __ far_call(RuntimeAddress(Runtime1::entry_for(enter_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ j(_continuation); +} + +void MonitorExitStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_compute_lock) { + // lock_reg was destroyed by fast unlocking attempt => recompute it + ce->monitor_address(_monitor_ix, _lock_reg); + } + ce->store_parameter(_lock_reg->as_register(), 0); + // note: non-blocking leaf routine => no call info needed + Runtime1::StubID exit_id; + if (ce->compilation()->has_fpu_code()) { + exit_id = Runtime1::monitorexit_id; + } else { + exit_id = Runtime1::monitorexit_nofpu_id; + } + __ la(ra, _continuation); + __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id))); +} + +// Implementation of patching: +// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) +// - Replace original code with a call to the stub +// At Runtime: +// - call to stub, jump to runtime +// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object) +// - in runtime: after initializing class, restore original code, reexecute instruction + +int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; + +void PatchingStub::align_patch_site(MacroAssembler* masm) {} + +void PatchingStub::emit_code(LIR_Assembler* ce) { + assert(false, "RISCV should not use C1 runtime patching"); +} + +void DeoptimizeStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + ce->store_parameter(_trap_request, 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id))); + ce->add_call_info_here(_info); + DEBUG_ONLY(__ should_not_reach_here()); +} + +void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { + address a = NULL; + if (_info->deoptimize_on_exception()) { + // Deoptimize, do not throw the exception, because it is probably wrong to do it here. + a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + } else { + a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id); + } + + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + __ bind(_entry); + __ far_call(RuntimeAddress(a)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + + __ bind(_entry); + // pass the object in a tmp register because all other registers + // must be preserved + if (_obj->is_cpu_register()) { + __ mv(t0, _obj->as_register()); + } + __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), NULL, t1); + ce->add_call_info_here(_info); + debug_only(__ should_not_reach_here()); +} + +void ArrayCopyStub::emit_code(LIR_Assembler* ce) { + // ---------------slow case: call to native----------------- + __ bind(_entry); + // Figure out where the args should go + // This should really convert the IntrinsicID to the Method* and signature + // but I don't know how to do that. + const int args_num = 5; + VMRegPair args[args_num]; + BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT }; + SharedRuntime::java_calling_convention(signature, args, args_num); + + // push parameters + Register r[args_num]; + r[0] = src()->as_register(); + r[1] = src_pos()->as_register(); + r[2] = dst()->as_register(); + r[3] = dst_pos()->as_register(); + r[4] = length()->as_register(); + + // next registers will get stored on the stack + for (int j = 0; j < args_num; j++) { + VMReg r_1 = args[j].first(); + if (r_1->is_stack()) { + int st_off = r_1->reg2stack() * wordSize; + __ sd(r[j], Address(sp, st_off)); + } else { + assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg"); + } + } + + ce->align_call(lir_static_call); + + ce->emit_static_call_stub(); + if (ce->compilation()->bailed_out()) { + return; // CodeCache is full + } + Address resolve(SharedRuntime::get_resolve_static_call_stub(), + relocInfo::static_call_type); + address call = __ trampoline_call(resolve); + if (call == NULL) { + ce->bailout("trampoline stub overflow"); + return; + } + ce->add_call_info_here(info()); + +#ifndef PRODUCT + __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); + __ add_memory_int32(Address(t1), 1); +#endif + + __ j(_continuation); +} + +#undef __ Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_DEFS_RISCV_HPP +#define CPU_RISCV_C1_DEFS_RISCV_HPP + +// native word offsets from memory address (little endian) +enum { + pd_lo_word_offset_in_bytes = 0, + pd_hi_word_offset_in_bytes = BytesPerWord +}; + +// explicit rounding operations are required to implement the strictFP mode +enum { + pd_strict_fp_requires_explicit_rounding = false +}; + +// registers +enum { + pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers, // number of registers used during code emission + pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of float registers used during code emission + + // caller saved + pd_nof_caller_save_cpu_regs_frame_map = 13, // number of registers killed by calls + pd_nof_caller_save_fpu_regs_frame_map = 32, // number of float registers killed by calls + + pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map, + pd_last_callee_saved_reg = 21, + + pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1, + + pd_nof_cpu_regs_reg_alloc + = pd_nof_caller_save_cpu_regs_frame_map, // number of registers that are visible to register allocator + pd_nof_fpu_regs_reg_alloc = 32, // number of float registers that are visible to register allocator + + pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan + pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of float registers visible to linear scan + pd_nof_xmm_regs_linearscan = 0, // don't have vector registers + + pd_first_cpu_reg = 0, + pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1, + pd_first_byte_reg = 0, + pd_last_byte_reg = pd_nof_cpu_regs_reg_alloc - 1, + + pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, + pd_last_fpu_reg = pd_first_fpu_reg + 31, + + pd_first_callee_saved_fpu_reg_1 = 8 + pd_first_fpu_reg, + pd_last_callee_saved_fpu_reg_1 = 9 + pd_first_fpu_reg, + pd_first_callee_saved_fpu_reg_2 = 18 + pd_first_fpu_reg, + pd_last_callee_saved_fpu_reg_2 = 27 + pd_first_fpu_reg +}; + + +// Encoding of float value in debug info. This is true on x86 where +// floats are extended to doubles when stored in the stack, false for +// RISCV where floats and doubles are stored in their native form. +enum { + pd_float_saved_as_double = false +}; + +#endif // CPU_RISCV_C1_DEFS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +//-------------------------------------------------------- +// FpuStackSim +//-------------------------------------------------------- + +// No FPU stack on RISCV Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP +#define CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP + +// No FPU stack on RISCV +class FpuStackSim; + +#endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp @@ -0,0 +1,388 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIR.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_riscv.inline.hpp" + +LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { + LIR_Opr opr = LIR_OprFact::illegalOpr; + VMReg r_1 = reg->first(); + VMReg r_2 = reg->second(); + if (r_1->is_stack()) { + // Convert stack slot to an SP offset + // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value + // so we must add it in here. + int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; + opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type)); + } else if (r_1->is_Register()) { + Register reg1 = r_1->as_Register(); + if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) { + Register reg2 = r_2->as_Register(); + assert(reg2 == reg1, "must be same register"); + opr = as_long_opr(reg1); + } else if (is_reference_type(type)) { + opr = as_oop_opr(reg1); + } else if (type == T_METADATA) { + opr = as_metadata_opr(reg1); + } else if (type == T_ADDRESS) { + opr = as_address_opr(reg1); + } else { + opr = as_opr(reg1); + } + } else if (r_1->is_FloatRegister()) { + assert(type == T_DOUBLE || type == T_FLOAT, "wrong type"); + int num = r_1->as_FloatRegister()->encoding(); + if (type == T_FLOAT) { + opr = LIR_OprFact::single_fpu(num); + } else { + opr = LIR_OprFact::double_fpu(num); + } + } else { + ShouldNotReachHere(); + } + return opr; +} + +LIR_Opr FrameMap::zr_opr; +LIR_Opr FrameMap::r1_opr; +LIR_Opr FrameMap::r2_opr; +LIR_Opr FrameMap::r3_opr; +LIR_Opr FrameMap::r4_opr; +LIR_Opr FrameMap::r5_opr; +LIR_Opr FrameMap::r6_opr; +LIR_Opr FrameMap::r7_opr; +LIR_Opr FrameMap::r8_opr; +LIR_Opr FrameMap::r9_opr; +LIR_Opr FrameMap::r10_opr; +LIR_Opr FrameMap::r11_opr; +LIR_Opr FrameMap::r12_opr; +LIR_Opr FrameMap::r13_opr; +LIR_Opr FrameMap::r14_opr; +LIR_Opr FrameMap::r15_opr; +LIR_Opr FrameMap::r16_opr; +LIR_Opr FrameMap::r17_opr; +LIR_Opr FrameMap::r18_opr; +LIR_Opr FrameMap::r19_opr; +LIR_Opr FrameMap::r20_opr; +LIR_Opr FrameMap::r21_opr; +LIR_Opr FrameMap::r22_opr; +LIR_Opr FrameMap::r23_opr; +LIR_Opr FrameMap::r24_opr; +LIR_Opr FrameMap::r25_opr; +LIR_Opr FrameMap::r26_opr; +LIR_Opr FrameMap::r27_opr; +LIR_Opr FrameMap::r28_opr; +LIR_Opr FrameMap::r29_opr; +LIR_Opr FrameMap::r30_opr; +LIR_Opr FrameMap::r31_opr; + +LIR_Opr FrameMap::fp_opr; +LIR_Opr FrameMap::sp_opr; + +LIR_Opr FrameMap::receiver_opr; + +LIR_Opr FrameMap::zr_oop_opr; +LIR_Opr FrameMap::r1_oop_opr; +LIR_Opr FrameMap::r2_oop_opr; +LIR_Opr FrameMap::r3_oop_opr; +LIR_Opr FrameMap::r4_oop_opr; +LIR_Opr FrameMap::r5_oop_opr; +LIR_Opr FrameMap::r6_oop_opr; +LIR_Opr FrameMap::r7_oop_opr; +LIR_Opr FrameMap::r8_oop_opr; +LIR_Opr FrameMap::r9_oop_opr; +LIR_Opr FrameMap::r10_oop_opr; +LIR_Opr FrameMap::r11_oop_opr; +LIR_Opr FrameMap::r12_oop_opr; +LIR_Opr FrameMap::r13_oop_opr; +LIR_Opr FrameMap::r14_oop_opr; +LIR_Opr FrameMap::r15_oop_opr; +LIR_Opr FrameMap::r16_oop_opr; +LIR_Opr FrameMap::r17_oop_opr; +LIR_Opr FrameMap::r18_oop_opr; +LIR_Opr FrameMap::r19_oop_opr; +LIR_Opr FrameMap::r20_oop_opr; +LIR_Opr FrameMap::r21_oop_opr; +LIR_Opr FrameMap::r22_oop_opr; +LIR_Opr FrameMap::r23_oop_opr; +LIR_Opr FrameMap::r24_oop_opr; +LIR_Opr FrameMap::r25_oop_opr; +LIR_Opr FrameMap::r26_oop_opr; +LIR_Opr FrameMap::r27_oop_opr; +LIR_Opr FrameMap::r28_oop_opr; +LIR_Opr FrameMap::r29_oop_opr; +LIR_Opr FrameMap::r30_oop_opr; +LIR_Opr FrameMap::r31_oop_opr; + +LIR_Opr FrameMap::t0_opr; +LIR_Opr FrameMap::t1_opr; +LIR_Opr FrameMap::t0_long_opr; +LIR_Opr FrameMap::t1_long_opr; + +LIR_Opr FrameMap::r10_metadata_opr; +LIR_Opr FrameMap::r11_metadata_opr; +LIR_Opr FrameMap::r12_metadata_opr; +LIR_Opr FrameMap::r13_metadata_opr; +LIR_Opr FrameMap::r14_metadata_opr; +LIR_Opr FrameMap::r15_metadata_opr; + +LIR_Opr FrameMap::long10_opr; +LIR_Opr FrameMap::long11_opr; +LIR_Opr FrameMap::fpu10_float_opr; +LIR_Opr FrameMap::fpu10_double_opr; + +LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; +LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; + +//-------------------------------------------------------- +// FrameMap +//-------------------------------------------------------- +// |---f31--| +// |---..---| +// |---f28--| +// |---f27--|<---pd_last_callee_saved_fpu_reg_2 +// |---..---| +// |---f18--|<---pd_first_callee_saved_fpu_reg_2 +// |---f17--| +// |---..---| +// |---f10--| +// |---f9---|<---pd_last_callee_saved_fpu_reg_1 +// |---f8---|<---pd_first_callee_saved_fpu_reg_1 +// |---f7---| +// |---..---| +// |---f0---| +// |---x27--| +// |---x23--| +// |---x8---| +// |---x4---| +// |---x3---| +// |---x2---| +// |---x1---| +// |---x0---| +// |---x26--|<---pd_last_callee_saved_reg +// |---..---| +// |---x18--| +// |---x9---|<---pd_first_callee_saved_reg +// |---x31--| +// |---..---| +// |---x28--| +// |---x17--| +// |---..---| +// |---x10--| +// |---x7---| + +void FrameMap::initialize() { + assert(!_init_done, "once"); + + int i = 0; + + // caller save register + map_register(i, x7); r7_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x10); r10_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x11); r11_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x12); r12_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x13); r13_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x14); r14_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x15); r15_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x16); r16_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x17); r17_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x28); r28_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x29); r29_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x30); r30_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x31); r31_opr = LIR_OprFact::single_cpu(i); i++; + + // callee save register + map_register(i, x9); r9_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x18); r18_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x19); r19_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x20); r20_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x21); r21_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x22); r22_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x24); r24_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x25); r25_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x26); r26_opr = LIR_OprFact::single_cpu(i); i++; + + // special register + map_register(i, x0); zr_opr = LIR_OprFact::single_cpu(i); i++; // zr + map_register(i, x1); r1_opr = LIR_OprFact::single_cpu(i); i++; // ra + map_register(i, x2); r2_opr = LIR_OprFact::single_cpu(i); i++; // sp + map_register(i, x3); r3_opr = LIR_OprFact::single_cpu(i); i++; // gp + map_register(i, x4); r4_opr = LIR_OprFact::single_cpu(i); i++; // thread + map_register(i, x8); r8_opr = LIR_OprFact::single_cpu(i); i++; // fp + map_register(i, x23); r23_opr = LIR_OprFact::single_cpu(i); i++; // java thread + map_register(i, x27); r27_opr = LIR_OprFact::single_cpu(i); i++; // heapbase + + // tmp register + map_register(i, x5); r5_opr = LIR_OprFact::single_cpu(i); i++; // t0 + map_register(i, x6); r6_opr = LIR_OprFact::single_cpu(i); i++; // t1 + + t0_opr = r5_opr; + t1_opr = r6_opr; + t0_long_opr = LIR_OprFact::double_cpu(r5_opr->cpu_regnr(), r5_opr->cpu_regnr()); + t1_long_opr = LIR_OprFact::double_cpu(r6_opr->cpu_regnr(), r6_opr->cpu_regnr()); + + long10_opr = LIR_OprFact::double_cpu(r10_opr->cpu_regnr(), r10_opr->cpu_regnr()); + long11_opr = LIR_OprFact::double_cpu(r11_opr->cpu_regnr(), r11_opr->cpu_regnr()); + + fpu10_float_opr = LIR_OprFact::single_fpu(10); + fpu10_double_opr = LIR_OprFact::double_fpu(10); + + i = 0; + _caller_save_cpu_regs[i++] = r7_opr; + _caller_save_cpu_regs[i++] = r10_opr; + _caller_save_cpu_regs[i++] = r11_opr; + _caller_save_cpu_regs[i++] = r12_opr; + _caller_save_cpu_regs[i++] = r13_opr; + _caller_save_cpu_regs[i++] = r14_opr; + _caller_save_cpu_regs[i++] = r15_opr; + _caller_save_cpu_regs[i++] = r16_opr; + _caller_save_cpu_regs[i++] = r17_opr; + _caller_save_cpu_regs[i++] = r28_opr; + _caller_save_cpu_regs[i++] = r29_opr; + _caller_save_cpu_regs[i++] = r30_opr; + _caller_save_cpu_regs[i++] = r31_opr; + + _init_done = true; + + zr_oop_opr = as_oop_opr(x0); + r1_oop_opr = as_oop_opr(x1); + r2_oop_opr = as_oop_opr(x2); + r3_oop_opr = as_oop_opr(x3); + r4_oop_opr = as_oop_opr(x4); + r5_oop_opr = as_oop_opr(x5); + r6_oop_opr = as_oop_opr(x6); + r7_oop_opr = as_oop_opr(x7); + r8_oop_opr = as_oop_opr(x8); + r9_oop_opr = as_oop_opr(x9); + r10_oop_opr = as_oop_opr(x10); + r11_oop_opr = as_oop_opr(x11); + r12_oop_opr = as_oop_opr(x12); + r13_oop_opr = as_oop_opr(x13); + r14_oop_opr = as_oop_opr(x14); + r15_oop_opr = as_oop_opr(x15); + r16_oop_opr = as_oop_opr(x16); + r17_oop_opr = as_oop_opr(x17); + r18_oop_opr = as_oop_opr(x18); + r19_oop_opr = as_oop_opr(x19); + r20_oop_opr = as_oop_opr(x20); + r21_oop_opr = as_oop_opr(x21); + r22_oop_opr = as_oop_opr(x22); + r23_oop_opr = as_oop_opr(x23); + r24_oop_opr = as_oop_opr(x24); + r25_oop_opr = as_oop_opr(x25); + r26_oop_opr = as_oop_opr(x26); + r27_oop_opr = as_oop_opr(x27); + r28_oop_opr = as_oop_opr(x28); + r29_oop_opr = as_oop_opr(x29); + r30_oop_opr = as_oop_opr(x30); + r31_oop_opr = as_oop_opr(x31); + + r10_metadata_opr = as_metadata_opr(x10); + r11_metadata_opr = as_metadata_opr(x11); + r12_metadata_opr = as_metadata_opr(x12); + r13_metadata_opr = as_metadata_opr(x13); + r14_metadata_opr = as_metadata_opr(x14); + r15_metadata_opr = as_metadata_opr(x15); + + sp_opr = as_pointer_opr(sp); + fp_opr = as_pointer_opr(fp); + + VMRegPair regs; + BasicType sig_bt = T_OBJECT; + SharedRuntime::java_calling_convention(&sig_bt, ®s, 1); + receiver_opr = as_oop_opr(regs.first()->as_Register()); + + for (i = 0; i < nof_caller_save_fpu_regs; i++) { + _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); + } +} + + +Address FrameMap::make_new_address(ByteSize sp_offset) const { + return Address(sp, in_bytes(sp_offset)); +} + + +// ----------------mapping----------------------- +// all mapping is based on fp addressing, except for simple leaf methods where we access +// the locals sp based (and no frame is built) + + +// Frame for simple leaf methods (quick entries) +// +// +----------+ +// | ret addr | <- TOS +// +----------+ +// | args | +// | ...... | + +// Frame for standard methods +// +// | .........| <- TOS +// | locals | +// +----------+ +// | old fp, | +// +----------+ +// | ret addr | +// +----------+ +// | args | <- FP +// | .........| + + +// For OopMaps, map a local variable or spill index to an VMRegImpl name. +// This is the offset from sp() in the frame of the slot for the index, +// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.) +// +// framesize + +// stack0 stack0 0 <- VMReg +// | | | +// ...........|..............|.............| +// 0 1 2 3 x x 4 5 6 ... | <- local indices +// ^ ^ sp() ( x x indicate link +// | | and return addr) +// arguments non-argument locals + + +VMReg FrameMap::fpu_regname (int n) { + // Return the OptoReg name for the fpu stack slot "n" + // A spilled fpu stack slot comprises to two single-word OptoReg's. + return as_FloatRegister(n)->as_VMReg(); +} + +LIR_Opr FrameMap::stack_pointer() { + return FrameMap::sp_opr; +} + +// JSR 292 +LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { + return LIR_OprFact::illegalOpr; // Not needed on riscv +} + +bool FrameMap::validate_frame() { + return true; +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_FRAMEMAP_RISCV_HPP +#define CPU_RISCV_C1_FRAMEMAP_RISCV_HPP + +// On RISCV the frame looks as follows: +// +// +-----------------------------+---------+----------------------------------------+----------------+----------- +// | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling . +// +-----------------------------+---------+----------------------------------------+----------------+----------- + + public: + static const int pd_c_runtime_reserved_arg_size; + + enum { + first_available_sp_in_frame = 0, + frame_pad_in_bytes = 16, + nof_reg_args = 8 + }; + + public: + static LIR_Opr receiver_opr; + + static LIR_Opr zr_opr; + static LIR_Opr r1_opr; + static LIR_Opr r2_opr; + static LIR_Opr r3_opr; + static LIR_Opr r4_opr; + static LIR_Opr r5_opr; + static LIR_Opr r6_opr; + static LIR_Opr r7_opr; + static LIR_Opr r8_opr; + static LIR_Opr r9_opr; + static LIR_Opr r10_opr; + static LIR_Opr r11_opr; + static LIR_Opr r12_opr; + static LIR_Opr r13_opr; + static LIR_Opr r14_opr; + static LIR_Opr r15_opr; + static LIR_Opr r16_opr; + static LIR_Opr r17_opr; + static LIR_Opr r18_opr; + static LIR_Opr r19_opr; + static LIR_Opr r20_opr; + static LIR_Opr r21_opr; + static LIR_Opr r22_opr; + static LIR_Opr r23_opr; + static LIR_Opr r24_opr; + static LIR_Opr r25_opr; + static LIR_Opr r26_opr; + static LIR_Opr r27_opr; + static LIR_Opr r28_opr; + static LIR_Opr r29_opr; + static LIR_Opr r30_opr; + static LIR_Opr r31_opr; + static LIR_Opr fp_opr; + static LIR_Opr sp_opr; + + static LIR_Opr zr_oop_opr; + static LIR_Opr r1_oop_opr; + static LIR_Opr r2_oop_opr; + static LIR_Opr r3_oop_opr; + static LIR_Opr r4_oop_opr; + static LIR_Opr r5_oop_opr; + static LIR_Opr r6_oop_opr; + static LIR_Opr r7_oop_opr; + static LIR_Opr r8_oop_opr; + static LIR_Opr r9_oop_opr; + static LIR_Opr r10_oop_opr; + static LIR_Opr r11_oop_opr; + static LIR_Opr r12_oop_opr; + static LIR_Opr r13_oop_opr; + static LIR_Opr r14_oop_opr; + static LIR_Opr r15_oop_opr; + static LIR_Opr r16_oop_opr; + static LIR_Opr r17_oop_opr; + static LIR_Opr r18_oop_opr; + static LIR_Opr r19_oop_opr; + static LIR_Opr r20_oop_opr; + static LIR_Opr r21_oop_opr; + static LIR_Opr r22_oop_opr; + static LIR_Opr r23_oop_opr; + static LIR_Opr r24_oop_opr; + static LIR_Opr r25_oop_opr; + static LIR_Opr r26_oop_opr; + static LIR_Opr r27_oop_opr; + static LIR_Opr r28_oop_opr; + static LIR_Opr r29_oop_opr; + static LIR_Opr r30_oop_opr; + static LIR_Opr r31_oop_opr; + + static LIR_Opr t0_opr; + static LIR_Opr t1_opr; + static LIR_Opr t0_long_opr; + static LIR_Opr t1_long_opr; + + static LIR_Opr r10_metadata_opr; + static LIR_Opr r11_metadata_opr; + static LIR_Opr r12_metadata_opr; + static LIR_Opr r13_metadata_opr; + static LIR_Opr r14_metadata_opr; + static LIR_Opr r15_metadata_opr; + + static LIR_Opr long10_opr; + static LIR_Opr long11_opr; + static LIR_Opr fpu10_float_opr; + static LIR_Opr fpu10_double_opr; + + static LIR_Opr as_long_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); + } + static LIR_Opr as_pointer_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); + } + + // VMReg name for spilled physical FPU stack slot n + static VMReg fpu_regname(int n); + + static bool is_caller_save_register(LIR_Opr opr) { return true; } + static bool is_caller_save_register(Register r) { return true; } + + static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; } + static int last_cpu_reg() { return pd_last_cpu_reg; } + +#endif // CPU_RISCV_C1_FRAMEMAP_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +#define __ _masm-> + +void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal, + LIR_Opr result, CodeEmitInfo* info) { + // opcode check + assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem"); + bool is_irem = (code == lir_irem); + // opreand check + assert(left->is_single_cpu(), "left must be a register"); + assert(right->is_single_cpu() || right->is_constant(), "right must be a register or constant"); + assert(result->is_single_cpu(), "result must be a register"); + Register lreg = left->as_register(); + Register dreg = result->as_register(); + + // power-of-2 constant check and codegen + if (right->is_constant()) { + int c = right->as_constant_ptr()->as_jint(); + assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); + if (is_irem) { + if (c == 1) { + // move 0 to dreg if divisor is 1 + __ mv(dreg, zr); + } else { + unsigned int shift = exact_log2(c); + __ sraiw(t0, lreg, 0x1f); + __ srliw(t0, t0, BitsPerInt - shift); + __ addw(t1, lreg, t0); + if (is_imm_in_range(c - 1, 12, 0)) { + __ andi(t1, t1, c - 1); + } else { + __ zero_extend(t1, t1, shift); + } + __ subw(dreg, t1, t0); + } + } else { + if (c == 1) { + // move lreg to dreg if divisor is 1 + __ mv(dreg, lreg); + } else { + unsigned int shift = exact_log2(c); + __ sraiw(t0, lreg, 0x1f); + if (is_imm_in_range(c - 1, 12, 0)) { + __ andi(t0, t0, c - 1); + } else { + __ zero_extend(t0, t0, shift); + } + __ addw(dreg, t0, lreg); + __ sraiw(dreg, dreg, shift); + } + } + } else { + Register rreg = right->as_register(); + __ corrected_idivl(dreg, lreg, rreg, is_irem); + } +} + +void LIR_Assembler::arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right, + Register lreg, Register dreg) { + // cpu register - constant + jlong c; + + switch (right->type()) { + case T_LONG: + c = right->as_constant_ptr()->as_jlong(); break; + case T_INT: // fall through + case T_ADDRESS: + c = right->as_constant_ptr()->as_jint(); break; + default: + ShouldNotReachHere(); + c = 0; // unreachable + } + + assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); + if (c == 0 && dreg == lreg) { + COMMENT("effective nop elided"); + return; + } + switch (left->type()) { + case T_INT: + switch (code) { + case lir_add: __ addw(dreg, lreg, c); break; + case lir_sub: __ subw(dreg, lreg, c); break; + default: ShouldNotReachHere(); + } + break; + case T_OBJECT: // fall through + case T_ADDRESS: + switch (code) { + case lir_add: __ add(dreg, lreg, c); break; + case lir_sub: __ sub(dreg, lreg, c); break; + default: ShouldNotReachHere(); + } + break; + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::arith_op_single_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) { + Register lreg = left->as_register(); + Register dreg = as_reg(dest); + + if (right->is_single_cpu()) { + // cpu register - cpu register + assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be"); + Register rreg = right->as_register(); + switch (code) { + case lir_add: __ addw(dest->as_register(), lreg, rreg); break; + case lir_sub: __ subw(dest->as_register(), lreg, rreg); break; + case lir_mul: __ mulw(dest->as_register(), lreg, rreg); break; + default: ShouldNotReachHere(); + } + } else if (right->is_double_cpu()) { + Register rreg = right->as_register_lo(); + // sigle_cpu + double_cpu; can happen with obj_long + assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); + switch (code) { + case lir_add: __ add(dreg, lreg, rreg); break; + case lir_sub: __ sub(dreg, lreg, rreg); break; + default: ShouldNotReachHere(); + } + } else if (right->is_constant()) { + arith_op_single_cpu_right_constant(code, left, right, lreg, dreg); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) { + Register lreg_lo = left->as_register_lo(); + + if (right->is_double_cpu()) { + // cpu register - cpu register + Register rreg_lo = right->as_register_lo(); + switch (code) { + case lir_add: __ add(dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_sub: __ sub(dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_mul: __ mul(dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_div: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, false); break; + case lir_rem: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, true); break; + default: + ShouldNotReachHere(); + } + } else if (right->is_constant()) { + jlong c = right->as_constant_ptr()->as_jlong(); + Register dreg = as_reg(dest); + switch (code) { + case lir_add: // fall through + case lir_sub: + if (c == 0 && dreg == lreg_lo) { + COMMENT("effective nop elided"); + return; + } + code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c); + break; + case lir_div: + assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move lreg_lo to dreg if divisor is 1 + __ mv(dreg, lreg_lo); + } else { + unsigned int shift = exact_log2_long(c); + // use t0 as intermediate result register + __ srai(t0, lreg_lo, 0x3f); + if (is_imm_in_range(c - 1, 12, 0)) { + __ andi(t0, t0, c - 1); + } else { + __ zero_extend(t0, t0, shift); + } + __ add(dreg, t0, lreg_lo); + __ srai(dreg, dreg, shift); + } + break; + case lir_rem: + assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move 0 to dreg if divisor is 1 + __ mv(dreg, zr); + } else { + unsigned int shift = exact_log2_long(c); + __ srai(t0, lreg_lo, 0x3f); + __ srli(t0, t0, BitsPerLong - shift); + __ add(t1, lreg_lo, t0); + if (is_imm_in_range(c - 1, 12, 0)) { + __ andi(t1, t1, c - 1); + } else { + __ zero_extend(t1, t1, shift); + } + __ sub(dreg, t1, t0); + } + break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) { + assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register"); + switch (code) { + case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) { + if (right->is_double_fpu()) { + // fpu register - fpu register + switch (code) { + case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, + CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); + + if (left->is_single_cpu()) { + arith_op_single_cpu(code, left, right, dest); + } else if (left->is_double_cpu()) { + arith_op_double_cpu(code, left, right, dest); + } else if (left->is_single_fpu()) { + arith_op_single_fpu(code, left, right, dest); + } else if (left->is_double_fpu()) { + arith_op_double_fpu(code, left, right, dest); + } else { + ShouldNotReachHere(); + } +} + +#undef __ Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP +#define CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP + + // arith_op sub functions + void arith_op_single_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); + void arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); + void arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); + void arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); + void arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right, Register lreg, Register dreg); + void arithmetic_idiv(LIR_Op3* op, bool is_irem); + +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp @@ -0,0 +1,388 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "ci/ciArrayKlass.hpp" +#include "oops/objArrayKlass.hpp" +#include "runtime/stubRoutines.hpp" + +#define __ _masm-> + + +void LIR_Assembler::generic_arraycopy(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, CodeStub *stub) { + assert(src == x11 && src_pos == x12, "mismatch in calling convention"); + // Save the arguments in case the generic arraycopy fails and we + // have to fall back to the JNI stub + arraycopy_store_args(src, src_pos, length, dst, dst_pos); + + address copyfunc_addr = StubRoutines::generic_arraycopy(); + assert(copyfunc_addr != NULL, "generic arraycopy stub required"); + + // The arguments are in java calling convention so we shift them + // to C convention + assert_different_registers(c_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4); + __ mv(c_rarg0, j_rarg0); + assert_different_registers(c_rarg1, j_rarg2, j_rarg3, j_rarg4); + __ mv(c_rarg1, j_rarg1); + assert_different_registers(c_rarg2, j_rarg3, j_rarg4); + __ mv(c_rarg2, j_rarg2); + assert_different_registers(c_rarg3, j_rarg4); + __ mv(c_rarg3, j_rarg3); + __ mv(c_rarg4, j_rarg4); +#ifndef PRODUCT + if (PrintC1Statistics) { + __ add_memory_int32(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt), 1); + } +#endif + __ far_call(RuntimeAddress(copyfunc_addr)); + __ beqz(x10, *stub->continuation()); + // Reload values from the stack so they are where the stub + // expects them. + arraycopy_load_args(src, src_pos, length, dst, dst_pos); + + // x10 is -1^K where K == partial copied count + __ xori(t0, x10, -1); + // adjust length down and src/end pos up by partial copied count + __ subw(length, length, t0); + __ addw(src_pos, src_pos, t0); + __ addw(dst_pos, dst_pos, t0); + __ j(*stub->entry()); + + __ bind(*stub->continuation()); +} + +void LIR_Assembler::arraycopy_simple_check(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, Register tmp, + CodeStub *stub, int flags) { + // test for NULL + if (flags & LIR_OpArrayCopy::src_null_check) { + __ beqz(src, *stub->entry(), /* is_far */ true); + } + if (flags & LIR_OpArrayCopy::dst_null_check) { + __ beqz(dst, *stub->entry(), /* is_far */ true); + } + + // If the compiler was not able to prove that exact type of the source or the destination + // of the arraycopy is an array type, check at runtime if the source or the destination is + // an instance type. + if (flags & LIR_OpArrayCopy::type_check) { + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ li(t1, Klass::_lh_neutral_value); + __ bge(t0, t1, *stub->entry(), /* is_far */ true); + } + + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ li(t1, Klass::_lh_neutral_value); + __ bge(t0, t1, *stub->entry(), /* is_far */ true); + } + } + + // check if negative + if (flags & LIR_OpArrayCopy::src_pos_positive_check) { + __ bltz(src_pos, *stub->entry(), /* is_far */ true); + } + if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { + __ bltz(dst_pos, *stub->entry(), /* is_far */ true); + } + if (flags & LIR_OpArrayCopy::length_positive_check) { + __ bltz(length, *stub->entry(), /* is_far */ true); + } + + if (flags & LIR_OpArrayCopy::src_range_check) { + __ addw(tmp, src_pos, length); + __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes())); + __ bgtu(tmp, t0, *stub->entry(), /* is_far */ true); + } + if (flags & LIR_OpArrayCopy::dst_range_check) { + __ addw(tmp, dst_pos, length); + __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes())); + __ bgtu(tmp, t0, *stub->entry(), /* is_far */ true); + } +} + +void LIR_Assembler::arraycopy_checkcast(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, Register tmp, + CodeStub *stub, BasicType basic_type, + address copyfunc_addr, int flags) { + // src is not a sub class of dst so we have to do a + // per-element check. + int mask = LIR_OpArrayCopy::src_objarray | LIR_OpArrayCopy::dst_objarray; + if ((flags & mask) != mask) { + // Check that at least both of them object arrays. + assert(flags & mask, "one of the two should be known to be an object array"); + + if (!(flags & LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + } + int lh_offset = in_bytes(Klass::layout_helper_offset()); + Address klass_lh_addr(tmp, lh_offset); + jint objArray_lh = Klass::array_layout_helper(T_OBJECT); + __ lw(t0, klass_lh_addr); + __ mvw(t1, objArray_lh); + __ bne(t0, t1, *stub->entry(), /* is_far */ true); + } + + // Spill because stubs can use any register they like and it's + // easier to restore just those that we care about. + arraycopy_store_args(src, src_pos, length, dst, dst_pos); + arraycopy_checkcast_prepare_params(src, src_pos, length, dst, dst_pos, basic_type); + __ far_call(RuntimeAddress(copyfunc_addr)); + +#ifndef PRODUCT + if (PrintC1Statistics) { + Label failed; + __ bnez(x10, failed); + __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt), 1); + __ bind(failed); + } +#endif + + __ beqz(x10, *stub->continuation()); + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt), 1); + } +#endif + assert_different_registers(dst, dst_pos, length, src_pos, src, x10, t0); + + // Restore previously spilled arguments + arraycopy_load_args(src, src_pos, length, dst, dst_pos); + + // return value is -1^K where K is partial copied count + __ xori(t0, x10, -1); + // adjust length down and src/end pos up by partial copied count + __ subw(length, length, t0); + __ addw(src_pos, src_pos, t0); + __ addw(dst_pos, dst_pos, t0); +} + +void LIR_Assembler::arraycopy_type_check(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, Register tmp, + CodeStub *stub, BasicType basic_type, int flags) { + // We don't know the array types are compatible + if (basic_type != T_OBJECT) { + // Simple test for basic type arrays + if (UseCompressedClassPointers) { + __ lwu(tmp, Address(src, oopDesc::klass_offset_in_bytes())); + __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes())); + } else { + __ ld(tmp, Address(src, oopDesc::klass_offset_in_bytes())); + __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes())); + } + __ bne(tmp, t0, *stub->entry(), /* is_far */ true); + } else { + // For object arrays, if src is a sub class of dst then we can + // safely do the copy. + Label cont, slow; + +#define PUSH(r1, r2) \ + __ addi(sp, sp, -2 * wordSize); \ + __ sd(r1, Address(sp, 1 * wordSize)); \ + __ sd(r2, Address(sp, 0)); + +#define POP(r1, r2) \ + __ ld(r1, Address(sp, 1 * wordSize)); \ + __ ld(r2, Address(sp, 0)); \ + __ addi(sp, sp, 2 * wordSize); + + PUSH(src, dst); + __ load_klass(src, src); + __ load_klass(dst, dst); + __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL); + + PUSH(src, dst); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + POP(src, dst); + __ bnez(dst, cont); + + __ bind(slow); + POP(src, dst); + + address copyfunc_addr = StubRoutines::checkcast_arraycopy(); + if (copyfunc_addr != NULL) { // use stub if available + arraycopy_checkcast(src, src_pos, length, dst, dst_pos, tmp, stub, basic_type, copyfunc_addr, flags); + } + + __ j(*stub->entry()); + __ bind(cont); + POP(src, dst); + } +} + +void LIR_Assembler::arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags) { + assert(default_type != NULL, "NULL default_type!"); + BasicType basic_type = default_type->element_type()->basic_type(); + + if (basic_type == T_ARRAY) { basic_type = T_OBJECT; } + if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { + // Sanity check the known type with the incoming class. For the + // primitive case the types must match exactly with src.klass and + // dst.klass each exactly matching the default type. For the + // object array case, if no type check is needed then either the + // dst type is exactly the expected type and the src type is a + // subtype which we can't check or src is the same array as dst + // but not necessarily exactly of type default_type. + Label known_ok, halt; + __ mov_metadata(tmp, default_type->constant_encoding()); + if (UseCompressedClassPointers) { + __ encode_klass_not_null(tmp); + } + + if (basic_type != T_OBJECT) { + if (UseCompressedClassPointers) { + __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes())); + } else { + __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes())); + } + __ bne(tmp, t0, halt); + if (UseCompressedClassPointers) { + __ lwu(t0, Address(src, oopDesc::klass_offset_in_bytes())); + } else { + __ ld(t0, Address(src, oopDesc::klass_offset_in_bytes())); + } + __ beq(tmp, t0, known_ok); + } else { + if (UseCompressedClassPointers) { + __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes())); + } else { + __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes())); + } + __ beq(tmp, t0, known_ok); + __ beq(src, dst, known_ok); + } + __ bind(halt); + __ stop("incorrect type information in arraycopy"); + __ bind(known_ok); + } +} + +void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { + ciArrayKlass *default_type = op->expected_type(); + Register src = op->src()->as_register(); + Register dst = op->dst()->as_register(); + Register src_pos = op->src_pos()->as_register(); + Register dst_pos = op->dst_pos()->as_register(); + Register length = op->length()->as_register(); + Register tmp = op->tmp()->as_register(); + + CodeStub* stub = op->stub(); + int flags = op->flags(); + BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; + if (is_reference_type(basic_type)) { basic_type = T_OBJECT; } + + // if we don't know anything, just go through the generic arraycopy + if (default_type == NULL) { + generic_arraycopy(src, src_pos, length, dst, dst_pos, stub); + return; + } + + assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), + "must be true at this point"); + + arraycopy_simple_check(src, src_pos, length, dst, dst_pos, tmp, stub, flags); + + if (flags & LIR_OpArrayCopy::type_check) { + arraycopy_type_check(src, src_pos, length, dst, dst_pos, tmp, stub, basic_type, flags); + } + +#ifdef ASSERT + arraycopy_assert(src, dst, tmp, default_type, flags); +#endif + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ add_memory_int32(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)), 1); + } +#endif + arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type); + + bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; + bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; + const char *name = NULL; + address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); + + CodeBlob *cb = CodeCache::find_blob(entry); + if (cb != NULL) { + __ far_call(RuntimeAddress(entry)); + } else { + const int args_num = 3; + __ call_VM_leaf(entry, args_num); + } + + __ bind(*stub->continuation()); +} + + +void LIR_Assembler::arraycopy_prepare_params(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, BasicType basic_type) { + int scale = array_element_size(basic_type); + __ shadd(c_rarg0, src_pos, src, t0, scale); + __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg0, dst, dst_pos, length); + __ shadd(c_rarg1, dst_pos, dst, t0, scale); + __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg1, dst, length); + __ mv(c_rarg2, length); + assert_different_registers(c_rarg2, dst); +} + +void LIR_Assembler::arraycopy_checkcast_prepare_params(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, BasicType basic_type) { + arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type); + __ load_klass(c_rarg4, dst); + __ ld(c_rarg4, Address(c_rarg4, ObjArrayKlass::element_klass_offset())); + __ lwu(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset())); +} + +void LIR_Assembler::arraycopy_store_args(Register src, Register src_pos, Register length, + Register dst, Register dst_pos) { + __ sd(dst_pos, Address(sp, 0)); // 0: dst_pos sp offset + __ sd(dst, Address(sp, 1 * BytesPerWord)); // 1: dst sp offset + __ sd(length, Address(sp, 2 * BytesPerWord)); // 2: length sp offset + __ sd(src_pos, Address(sp, 3 * BytesPerWord)); // 3: src_pos sp offset + __ sd(src, Address(sp, 4 * BytesPerWord)); // 4: src sp offset +} + +void LIR_Assembler::arraycopy_load_args(Register src, Register src_pos, Register length, + Register dst, Register dst_pos) { + __ ld(dst_pos, Address(sp, 0)); // 0: dst_pos sp offset + __ ld(dst, Address(sp, 1 * BytesPerWord)); // 1: dst sp offset + __ ld(length, Address(sp, 2 * BytesPerWord)); // 2: length sp offset + __ ld(src_pos, Address(sp, 3 * BytesPerWord)); // 3: src_pos sp offset + __ ld(src, Address(sp, 4 * BytesPerWord)); // 4: src sp offset +} + +#undef __ Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP +#define CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP + + // arraycopy sub functions + void generic_arraycopy(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, CodeStub *stub); + void arraycopy_simple_check(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, Register tmp, + CodeStub *stub, int flags); + void arraycopy_checkcast(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, Register tmp, + CodeStub *stub, BasicType basic_type, + address copyfunc_addr, int flags); + void arraycopy_type_check(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, Register tmp, + CodeStub *stub, BasicType basic_type, int flags); + void arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags); + void arraycopy_prepare_params(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, BasicType basic_type); + void arraycopy_checkcast_prepare_params(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, BasicType basic_type); + void arraycopy_store_args(Register src, Register src_pos, Register length, + Register dst, Register dst_pos); + void arraycopy_load_args(Register src, Register src_pos, Register length, + Register dst, Register dst_pos); + +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp @@ -0,0 +1,2264 @@ +/* + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArrayKlass.hpp" +#include "ci/ciInstance.hpp" +#include "code/compiledIC.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/objArrayKlass.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "utilities/powerOfTwo.hpp" +#include "vmreg_riscv.inline.hpp" + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +NEEDS_CLEANUP // remove this definitions ? +const Register IC_Klass = t1; // where the IC klass is cached +const Register SYNC_header = x10; // synchronization header +const Register SHIFT_count = x10; // where count for shift operations must be + +#define __ _masm-> + +static void select_different_registers(Register preserve, + Register extra, + Register &tmp1, + Register &tmp2) { + if (tmp1 == preserve) { + assert_different_registers(tmp1, tmp2, extra); + tmp1 = extra; + } else if (tmp2 == preserve) { + assert_different_registers(tmp1, tmp2, extra); + tmp2 = extra; + } + assert_different_registers(preserve, tmp1, tmp2); +} + +static void select_different_registers(Register preserve, + Register extra, + Register &tmp1, + Register &tmp2, + Register &tmp3) { + if (tmp1 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp1 = extra; + } else if (tmp2 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp2 = extra; + } else if (tmp3 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp3 = extra; + } + assert_different_registers(preserve, tmp1, tmp2, tmp3); +} + +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } + +void LIR_Assembler::clinit_barrier(ciMethod* method) { + assert(VM_Version::supports_fast_class_init_checks(), "sanity"); + assert(!method->holder()->is_not_initialized(), "initialization should have been started"); + + Label L_skip_barrier; + + __ mov_metadata(t1, method->holder()->constant_encoding()); + __ clinit_barrier(t1, t0, &L_skip_barrier /* L_fast_path */); + __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + __ bind(L_skip_barrier); +} + +LIR_Opr LIR_Assembler::receiverOpr() { + return FrameMap::receiver_opr; +} + +LIR_Opr LIR_Assembler::osrBufferPointer() { + return FrameMap::as_pointer_opr(receiverOpr()->as_register()); +} + +void LIR_Assembler::breakpoint() { Unimplemented(); } + +void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } + +void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } + +static jlong as_long(LIR_Opr data) { + jlong result; + switch (data->type()) { + case T_INT: + result = (data->as_jint()); + break; + case T_LONG: + result = (data->as_jlong()); + break; + default: + ShouldNotReachHere(); + result = 0; // unreachable + } + return result; +} + +Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) { + if (addr->base()->is_illegal()) { + assert(addr->index()->is_illegal(), "must be illegal too"); + __ movptr(tmp, addr->disp()); + return Address(tmp, 0); + } + + Register base = addr->base()->as_pointer_register(); + LIR_Opr index_opr = addr->index(); + + if (index_opr->is_illegal()) { + return Address(base, addr->disp()); + } + + int scale = addr->scale(); + if (index_opr->is_cpu_register()) { + Register index; + if (index_opr->is_single_cpu()) { + index = index_opr->as_register(); + } else { + index = index_opr->as_register_lo(); + } + if (scale != 0) { + __ shadd(tmp, index, base, tmp, scale); + } else { + __ add(tmp, base, index); + } + return Address(tmp, addr->disp()); + } else if (index_opr->is_constant()) { + intptr_t addr_offset = (((intptr_t)index_opr->as_constant_ptr()->as_jint()) << scale) + addr->disp(); + return Address(base, addr_offset); + } + + Unimplemented(); + return Address(); +} + +Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { + ShouldNotReachHere(); + return Address(); +} + +Address LIR_Assembler::as_Address(LIR_Address* addr) { + return as_Address(addr, t0); +} + +Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { + return as_Address(addr); +} + +// Ensure a valid Address (base + offset) to a stack-slot. If stack access is +// not encodable as a base + (immediate) offset, generate an explicit address +// calculation to hold the address in t0. +Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) { + precond(size == 4 || size == 8); + Address addr = frame_map()->address_for_slot(index, adjust); + precond(addr.getMode() == Address::base_plus_offset); + precond(addr.base() == sp); + precond(addr.offset() > 0); + uint mask = size - 1; + assert((addr.offset() & mask) == 0, "scaled offsets only"); + + return addr; +} + +void LIR_Assembler::osr_entry() { + offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); + BlockBegin* osr_entry = compilation()->hir()->osr_entry(); + guarantee(osr_entry != NULL, "NULL osr_entry!"); + ValueStack* entry_state = osr_entry->state(); + int number_of_locks = entry_state->locks_size(); + + // we jump here if osr happens with the interpreter + // state set up to continue at the beginning of the + // loop that triggered osr - in particular, we have + // the following registers setup: + // + // x12: osr buffer + // + + //build frame + ciMethod* m = compilation()->method(); + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); + + // OSR buffer is + // + // locals[nlocals-1..0] + // monitors[0..number_of_locks] + // + // locals is a direct copy of the interpreter frame so in the osr buffer + // so first slot in the local array is the last local from the interpreter + // and last slot is local[0] (receiver) from the interpreter + // + // Similarly with locks. The first lock slot in the osr buffer is the nth lock + // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock + // in the interpreter frame (the method lock if a sync method) + + // Initialize monitors in the compiled activation. + // x12: pointer to osr buffer + // All other registers are dead at this point and the locals will be + // copied into place by code emitted in the IR. + + Register OSR_buf = osrBufferPointer()->as_pointer_register(); + { + assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below"); + int monitor_offset = BytesPerWord * method()->max_locals() + + (2 * BytesPerWord) * (number_of_locks - 1); + // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in + // the OSR buffer using 2 word entries: first the lock and then + // the oop. + for (int i = 0; i < number_of_locks; i++) { + int slot_offset = monitor_offset - ((i * 2) * BytesPerWord); +#ifdef ASSERT + // verify the interpreter's monitor has a non-null object + { + Label L; + __ ld(t0, Address(OSR_buf, slot_offset + 1 * BytesPerWord)); + __ bnez(t0, L); + __ stop("locked object is NULL"); + __ bind(L); + } +#endif // ASSERT + __ ld(x9, Address(OSR_buf, slot_offset + 0)); + __ sd(x9, frame_map()->address_for_monitor_lock(i)); + __ ld(x9, Address(OSR_buf, slot_offset + 1 * BytesPerWord)); + __ sd(x9, frame_map()->address_for_monitor_object(i)); + } + } +} + +// inline cache check; done before the frame is built. +int LIR_Assembler::check_icache() { + Register receiver = FrameMap::receiver_opr->as_register(); + Register ic_klass = IC_Klass; + int start_offset = __ offset(); + Label dont; + __ inline_cache_check(receiver, ic_klass, dont); + + // if icache check fails, then jump to runtime routine + // Note: RECEIVER must still contain the receiver! + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + + // We align the verified entry point unless the method body + // (including its inline cache check) will fit in a single 64-byte + // icache line. + if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) { + // force alignment after the cache check. + __ align(CodeEntryAlignment); + } + + __ bind(dont); + return start_offset; +} + +void LIR_Assembler::jobject2reg(jobject o, Register reg) { + if (o == NULL) { + __ mv(reg, zr); + } else { + __ movoop(reg, o, /* immediate */ true); + } +} + +void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { + deoptimize_trap(info); +} + +// This specifies the rsp decrement needed to build the frame +int LIR_Assembler::initial_frame_size_in_bytes() const { + // if rounding, must let FrameMap know! + + return in_bytes(frame_map()->framesize_in_bytes()); +} + +int LIR_Assembler::emit_exception_handler() { + // if the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci ==> add a nop + // (was bug 5/14/1999 -gri) + __ nop(); + + // generate code for exception handler + address handler_base = __ start_a_stub(exception_handler_size()); + if (handler_base == NULL) { + // not enough space left for the handler + bailout("exception handler overflow"); + return -1; + } + + int offset = code_offset(); + + // the exception oop and pc are in x10, and x13 + // no other registers need to be preserved, so invalidate them + __ invalidate_registers(false, true, true, false, true, true); + + // check that there is really an exception + __ verify_not_null_oop(x10); + + // search an exception handler (x10: exception oop, x13: throwing pc) + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id))); + __ should_not_reach_here(); + guarantee(code_offset() - offset <= exception_handler_size(), "overflow"); + __ end_a_stub(); + + return offset; +} + +// Emit the code to remove the frame from the stack in the exception +// unwind path. +int LIR_Assembler::emit_unwind_handler() { +#ifndef PRODUCT + if (CommentedAssembly) { + _masm->block_comment("Unwind handler"); + } +#endif // PRODUCT + + int offset = code_offset(); + + // Fetch the exception from TLS and clear out exception related thread state + __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); + __ sd(zr, Address(xthread, JavaThread::exception_oop_offset())); + __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); + + __ bind(_unwind_handler_entry); + __ verify_not_null_oop(x10); + if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { + __ mv(x9, x10); // Perserve the exception + } + + // Preform needed unlocking + MonitorExitStub* stub = NULL; + if (method()->is_synchronized()) { + monitor_address(0, FrameMap::r10_opr); + stub = new MonitorExitStub(FrameMap::r10_opr, true, 0); + __ unlock_object(x15, x14, x10, *stub->entry()); + __ bind(*stub->continuation()); + } + + if (compilation()->env()->dtrace_method_probes()) { + __ mv(c_rarg0, xthread); + __ mov_metadata(c_rarg1, method()->constant_encoding()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), c_rarg0, c_rarg1); + } + + if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { + __ mv(x10, x9); // Restore the exception + } + + // remove the activation and dispatch to the unwind handler + __ block_comment("remove_frame and dispatch to the unwind handler"); + __ remove_frame(initial_frame_size_in_bytes()); + __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id))); + + // Emit the slow path assembly + if (stub != NULL) { + stub->emit_code(this); + } + + return offset; +} + +int LIR_Assembler::emit_deopt_handler() { + // if the last instruciton is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bck => add a nop + // (was bug 5/14/1999 - gri) + __ nop(); + + // generate code for exception handler + address handler_base = __ start_a_stub(deopt_handler_size()); + if (handler_base == NULL) { + // not enough space left for the handler + bailout("deopt handler overflow"); + return -1; + } + + int offset = code_offset(); + + __ auipc(ra, 0); + __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); + guarantee(code_offset() - offset <= deopt_handler_size(), "overflow"); + __ end_a_stub(); + + return offset; +} + +void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { + assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10"); + + // Pop the stack before the safepoint code + __ remove_frame(initial_frame_size_in_bytes()); + + if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) { + __ reserved_stack_check(); + } + + code_stub->set_safepoint_offset(__ offset()); + __ relocate(relocInfo::poll_return_type); + __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */); + __ ret(); +} + +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { + guarantee(info != NULL, "Shouldn't be NULL"); + __ get_polling_page(t0, relocInfo::poll_type); + add_debug_info_for_branch(info); // This isn't just debug info: + // it's the oop map + __ read_polling_page(t0, 0, relocInfo::poll_type); + return __ offset(); +} + +void LIR_Assembler::move_regs(Register from_reg, Register to_reg) { + __ mv(to_reg, from_reg); +} + +void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); } + +void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { + assert(src->is_constant(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + address const_addr = NULL; + + switch (c->type()) { + case T_INT: + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mvw(dest->as_register(), c->as_jint()); + break; + + case T_ADDRESS: + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mv(dest->as_register(), c->as_jint()); + break; + + case T_LONG: + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mv(dest->as_register_lo(), (intptr_t)c->as_jlong()); + break; + + case T_OBJECT: + case T_ARRAY: + if (patch_code == lir_patch_none) { + jobject2reg(c->as_jobject(), dest->as_register()); + } else { + jobject2reg_with_patching(dest->as_register(), info); + } + break; + + case T_METADATA: + if (patch_code != lir_patch_none) { + klass2reg_with_patching(dest->as_register(), info); + } else { + __ mov_metadata(dest->as_register(), c->as_metadata()); + } + break; + + case T_FLOAT: + const_addr = float_constant(c->as_jfloat()); + assert(const_addr != NULL, "must create float constant in the constant table"); + __ flw(dest->as_float_reg(), InternalAddress(const_addr)); + break; + + case T_DOUBLE: + const_addr = double_constant(c->as_jdouble()); + assert(const_addr != NULL, "must create double constant in the constant table"); + __ fld(dest->as_double_reg(), InternalAddress(const_addr)); + break; + + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { + assert(src->is_constant(), "should not call otherwise"); + assert(dest->is_stack(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + switch (c->type()) { + case T_OBJECT: + if (c->as_jobject() == NULL) { + __ sd(zr, frame_map()->address_for_slot(dest->single_stack_ix())); + } else { + const2reg(src, FrameMap::t1_opr, lir_patch_none, NULL); + reg2stack(FrameMap::t1_opr, dest, c->type(), false); + } + break; + case T_ADDRESS: // fall through + const2reg(src, FrameMap::t1_opr, lir_patch_none, NULL); + reg2stack(FrameMap::t1_opr, dest, c->type(), false); + case T_INT: // fall through + case T_FLOAT: + if (c->as_jint_bits() == 0) { + __ sw(zr, frame_map()->address_for_slot(dest->single_stack_ix())); + } else { + __ mvw(t1, c->as_jint_bits()); + __ sw(t1, frame_map()->address_for_slot(dest->single_stack_ix())); + } + break; + case T_LONG: // fall through + case T_DOUBLE: + if (c->as_jlong_bits() == 0) { + __ sd(zr, frame_map()->address_for_slot(dest->double_stack_ix(), + lo_word_offset_in_bytes)); + } else { + __ mv(t1, (intptr_t)c->as_jlong_bits()); + __ sd(t1, frame_map()->address_for_slot(dest->double_stack_ix(), + lo_word_offset_in_bytes)); + } + break; + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) { + assert(src->is_constant(), "should not call otherwise"); + assert(dest->is_address(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + LIR_Address* to_addr = dest->as_address_ptr(); + void (Assembler::* insn)(Register Rt, const Address &adr, Register temp); + switch (type) { + case T_ADDRESS: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::sd; break; + case T_LONG: + assert(c->as_jlong() == 0, "should be"); + insn = &Assembler::sd; break; + case T_DOUBLE: + assert(c->as_jdouble() == 0.0, "should be"); + insn = &Assembler::sd; break; + case T_INT: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::sw; break; + case T_FLOAT: + assert(c->as_jfloat() == 0.0f, "should be"); + insn = &Assembler::sw; break; + case T_OBJECT: // fall through + case T_ARRAY: + assert(c->as_jobject() == 0, "should be"); + if (UseCompressedOops && !wide) { + insn = &Assembler::sw; + } else { + insn = &Assembler::sd; + } + break; + case T_CHAR: // fall through + case T_SHORT: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::sh; + break; + case T_BOOLEAN: // fall through + case T_BYTE: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::sb; break; + default: + ShouldNotReachHere(); + insn = &Assembler::sd; // unreachable + } + if (info != NULL) { + add_debug_info_for_null_check_here(info); + } + (_masm->*insn)(zr, as_Address(to_addr), t0); +} + +void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { + assert(src->is_register(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + + // move between cpu-registers + if (dest->is_single_cpu()) { + if (src->type() == T_LONG) { + // Can do LONG -> OBJECT + move_regs(src->as_register_lo(), dest->as_register()); + return; + } + assert(src->is_single_cpu(), "must match"); + if (src->type() == T_OBJECT) { + __ verify_oop(src->as_register()); + } + move_regs(src->as_register(), dest->as_register()); + } else if (dest->is_double_cpu()) { + if (is_reference_type(src->type())) { + __ verify_oop(src->as_register()); + move_regs(src->as_register(), dest->as_register_lo()); + return; + } + assert(src->is_double_cpu(), "must match"); + Register f_lo = src->as_register_lo(); + Register f_hi = src->as_register_hi(); + Register t_lo = dest->as_register_lo(); + Register t_hi = dest->as_register_hi(); + assert(f_hi == f_lo, "must be same"); + assert(t_hi == t_lo, "must be same"); + move_regs(f_lo, t_lo); + } else if (dest->is_single_fpu()) { + assert(src->is_single_fpu(), "expect single fpu"); + __ fmv_s(dest->as_float_reg(), src->as_float_reg()); + } else if (dest->is_double_fpu()) { + assert(src->is_double_fpu(), "expect double fpu"); + __ fmv_d(dest->as_double_reg(), src->as_double_reg()); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { + precond(src->is_register() && dest->is_stack()); + + uint const c_sz32 = sizeof(uint32_t); + uint const c_sz64 = sizeof(uint64_t); + + assert(src->is_register(), "should not call otherwise"); + assert(dest->is_stack(), "should not call otherwise"); + if (src->is_single_cpu()) { + int index = dest->single_stack_ix(); + if (is_reference_type(type)) { + __ sd(src->as_register(), stack_slot_address(index, c_sz64)); + __ verify_oop(src->as_register()); + } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) { + __ sd(src->as_register(), stack_slot_address(index, c_sz64)); + } else { + __ sw(src->as_register(), stack_slot_address(index, c_sz32)); + } + } else if (src->is_double_cpu()) { + int index = dest->double_stack_ix(); + Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); + __ sd(src->as_register_lo(), dest_addr_LO); + } else if (src->is_single_fpu()) { + int index = dest->single_stack_ix(); + __ fsw(src->as_float_reg(), stack_slot_address(index, c_sz32)); + } else if (src->is_double_fpu()) { + int index = dest->double_stack_ix(); + __ fsd(src->as_double_reg(), stack_slot_address(index, c_sz64)); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { + LIR_Address* to_addr = dest->as_address_ptr(); + // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src + Register compressed_src = t1; + + if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } + + if (is_reference_type(type)) { + __ verify_oop(src->as_register()); + + if (UseCompressedOops && !wide) { + __ encode_heap_oop(compressed_src, src->as_register()); + } else { + compressed_src = src->as_register(); + } + } + + int null_check_here = code_offset(); + + switch (type) { + case T_FLOAT: + __ fsw(src->as_float_reg(), as_Address(to_addr)); + break; + + case T_DOUBLE: + __ fsd(src->as_double_reg(), as_Address(to_addr)); + break; + + case T_ARRAY: // fall through + case T_OBJECT: + if (UseCompressedOops && !wide) { + __ sw(compressed_src, as_Address(to_addr)); + } else { + __ sd(compressed_src, as_Address(to_addr)); + } + break; + case T_METADATA: + // We get here to store a method pointer to the stack to pass to + // a dtrace runtime call. This can't work on 64 bit with + // compressed klass ptrs: T_METADATA can be compressed klass + // ptr or a 64 bit method pointer. + ShouldNotReachHere(); + __ sd(src->as_register(), as_Address(to_addr)); + break; + case T_ADDRESS: + __ sd(src->as_register(), as_Address(to_addr)); + break; + case T_INT: + __ sw(src->as_register(), as_Address(to_addr)); + break; + case T_LONG: + __ sd(src->as_register_lo(), as_Address(to_addr)); + break; + case T_BYTE: // fall through + case T_BOOLEAN: + __ sb(src->as_register(), as_Address(to_addr)); + break; + case T_CHAR: // fall through + case T_SHORT: + __ sh(src->as_register(), as_Address(to_addr)); + break; + default: + ShouldNotReachHere(); + } + + if (info != NULL) { + add_debug_info_for_null_check(null_check_here, info); + } +} + +void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { + precond(src->is_stack() && dest->is_register()); + + uint const c_sz32 = sizeof(uint32_t); + uint const c_sz64 = sizeof(uint64_t); + + if (dest->is_single_cpu()) { + int index = src->single_stack_ix(); + if (type == T_INT) { + __ lw(dest->as_register(), stack_slot_address(index, c_sz32)); + } else if (is_reference_type(type)) { + __ ld(dest->as_register(), stack_slot_address(index, c_sz64)); + __ verify_oop(dest->as_register()); + } else if (type == T_METADATA || type == T_ADDRESS) { + __ ld(dest->as_register(), stack_slot_address(index, c_sz64)); + } else { + __ lwu(dest->as_register(), stack_slot_address(index, c_sz32)); + } + } else if (dest->is_double_cpu()) { + int index = src->double_stack_ix(); + Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); + __ ld(dest->as_register_lo(), src_addr_LO); + } else if (dest->is_single_fpu()) { + int index = src->single_stack_ix(); + __ flw(dest->as_float_reg(), stack_slot_address(index, c_sz32)); + } else if (dest->is_double_fpu()) { + int index = src->double_stack_ix(); + __ fld(dest->as_double_reg(), stack_slot_address(index, c_sz64)); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) { + deoptimize_trap(info); +} + +void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { + LIR_Opr temp; + if (type == T_LONG || type == T_DOUBLE) { + temp = FrameMap::t1_long_opr; + } else { + temp = FrameMap::t1_opr; + } + + stack2reg(src, temp, src->type()); + reg2stack(temp, dest, dest->type(), false); +} + +void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) { + assert(src->is_address(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + + LIR_Address* addr = src->as_address_ptr(); + LIR_Address* from_addr = src->as_address_ptr(); + + if (addr->base()->type() == T_OBJECT) { + __ verify_oop(addr->base()->as_pointer_register()); + } + + if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } + + if (info != NULL) { + add_debug_info_for_null_check_here(info); + } + + int null_check_here = code_offset(); + switch (type) { + case T_FLOAT: + __ flw(dest->as_float_reg(), as_Address(from_addr)); + break; + case T_DOUBLE: + __ fld(dest->as_double_reg(), as_Address(from_addr)); + break; + case T_ARRAY: // fall through + case T_OBJECT: + if (UseCompressedOops && !wide) { + __ lwu(dest->as_register(), as_Address(from_addr)); + } else { + __ ld(dest->as_register(), as_Address(from_addr)); + } + break; + case T_METADATA: + // We get here to store a method pointer to the stack to pass to + // a dtrace runtime call. This can't work on 64 bit with + // compressed klass ptrs: T_METADATA can be a compressed klass + // ptr or a 64 bit method pointer. + ShouldNotReachHere(); + __ ld(dest->as_register(), as_Address(from_addr)); + break; + case T_ADDRESS: + // FIXME: OMG this is a horrible kludge. Any offset from an + // address that matches klass_offset_in_bytes() will be loaded + // as a word, not a long. + if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { + __ lwu(dest->as_register(), as_Address(from_addr)); + } else { + __ ld(dest->as_register(), as_Address(from_addr)); + } + break; + case T_INT: + __ lw(dest->as_register(), as_Address(from_addr)); + break; + case T_LONG: + __ ld(dest->as_register_lo(), as_Address_lo(from_addr)); + break; + case T_BYTE: + __ lb(dest->as_register(), as_Address(from_addr)); + break; + case T_BOOLEAN: + __ lbu(dest->as_register(), as_Address(from_addr)); + break; + case T_CHAR: + __ lhu(dest->as_register(), as_Address(from_addr)); + break; + case T_SHORT: + __ lh(dest->as_register(), as_Address(from_addr)); + break; + default: + ShouldNotReachHere(); + } + + if (is_reference_type(type)) { + if (UseCompressedOops && !wide) { + __ decode_heap_oop(dest->as_register()); + } + + if (!UseZGC) { + // Load barrier has not yet been applied, so ZGC can't verify the oop here + __ verify_oop(dest->as_register()); + } + } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { + if (UseCompressedClassPointers) { + __ decode_klass_not_null(dest->as_register()); + } + } +} + +void LIR_Assembler::emit_op3(LIR_Op3* op) { + switch (op->code()) { + case lir_idiv: // fall through + case lir_irem: + arithmetic_idiv(op->code(), + op->in_opr1(), + op->in_opr2(), + op->in_opr3(), + op->result_opr(), + op->info()); + break; + case lir_fmad: + __ fmadd_d(op->result_opr()->as_double_reg(), + op->in_opr1()->as_double_reg(), + op->in_opr2()->as_double_reg(), + op->in_opr3()->as_double_reg()); + break; + case lir_fmaf: + __ fmadd_s(op->result_opr()->as_float_reg(), + op->in_opr1()->as_float_reg(), + op->in_opr2()->as_float_reg(), + op->in_opr3()->as_float_reg()); + break; + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { + Label label; + + emit_branch(condition, cmp_opr1, cmp_opr2, label, /* is_far */ false, + /* is_unordered */ (condition == lir_cond_greaterEqual || condition == lir_cond_greater) ? false : true); + + Label done; + move_op(opr2, result, type, lir_patch_none, NULL, + false, // pop_fpu_stack + false, // unaligned + false); // wide + __ j(done); + __ bind(label); + move_op(opr1, result, type, lir_patch_none, NULL, + false, // pop_fpu_stack + false, // unaligned + false); // wide + __ bind(done); +} + +void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { + LIR_Condition condition = op->cond(); + if (condition == lir_cond_always) { + if (op->info() != NULL) { + add_debug_info_for_branch(op->info()); + } + } else { + assert(op->in_opr1() != LIR_OprFact::illegalOpr && op->in_opr2() != LIR_OprFact::illegalOpr, "conditional branches must have legal operands"); + } + bool is_unordered = (op->ublock() == op->block()); + emit_branch(condition, op->in_opr1(), op->in_opr2(), *op->label(), /* is_far */ true, is_unordered); +} + +void LIR_Assembler::emit_branch(LIR_Condition cmp_flag, LIR_Opr cmp1, LIR_Opr cmp2, Label& label, + bool is_far, bool is_unordered) { + + if (cmp_flag == lir_cond_always) { + __ j(label); + return; + } + + if (cmp1->is_cpu_register()) { + Register reg1 = as_reg(cmp1); + if (cmp2->is_cpu_register()) { + Register reg2 = as_reg(cmp2); + __ c1_cmp_branch(cmp_flag, reg1, reg2, label, cmp1->type(), is_far); + } else if (cmp2->is_constant()) { + const2reg_helper(cmp2); + __ c1_cmp_branch(cmp_flag, reg1, t0, label, cmp2->type(), is_far); + } else { + ShouldNotReachHere(); + } + } else if (cmp1->is_single_fpu()) { + assert(cmp2->is_single_fpu(), "expect single float register"); + __ c1_float_cmp_branch(cmp_flag, cmp1->as_float_reg(), cmp2->as_float_reg(), label, is_far, is_unordered); + } else if (cmp1->is_double_fpu()) { + assert(cmp2->is_double_fpu(), "expect double float register"); + __ c1_float_cmp_branch(cmp_flag | C1_MacroAssembler::c1_double_branch_mask, + cmp1->as_double_reg(), cmp2->as_double_reg(), label, is_far, is_unordered); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); + LIR_Opr dest = op->result_opr(); + + switch (op->bytecode()) { + case Bytecodes::_i2f: + __ fcvt_s_w(dest->as_float_reg(), src->as_register()); break; + case Bytecodes::_i2d: + __ fcvt_d_w(dest->as_double_reg(), src->as_register()); break; + case Bytecodes::_l2d: + __ fcvt_d_l(dest->as_double_reg(), src->as_register_lo()); break; + case Bytecodes::_l2f: + __ fcvt_s_l(dest->as_float_reg(), src->as_register_lo()); break; + case Bytecodes::_f2d: + __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg()); break; + case Bytecodes::_d2f: + __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg()); break; + case Bytecodes::_i2c: + __ zero_extend(dest->as_register(), src->as_register(), 16); break; + case Bytecodes::_i2l: + __ addw(dest->as_register_lo(), src->as_register(), zr); break; + case Bytecodes::_i2s: + __ sign_extend(dest->as_register(), src->as_register(), 16); break; + case Bytecodes::_i2b: + __ sign_extend(dest->as_register(), src->as_register(), 8); break; + case Bytecodes::_l2i: + _masm->block_comment("FIXME: This coulde be no-op"); + __ addw(dest->as_register(), src->as_register_lo(), zr); break; + case Bytecodes::_d2l: + __ fcvt_l_d_safe(dest->as_register_lo(), src->as_double_reg()); break; + case Bytecodes::_f2i: + __ fcvt_w_s_safe(dest->as_register(), src->as_float_reg()); break; + case Bytecodes::_f2l: + __ fcvt_l_s_safe(dest->as_register_lo(), src->as_float_reg()); break; + case Bytecodes::_d2i: + __ fcvt_w_d_safe(dest->as_register(), src->as_double_reg()); break; + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { + if (op->init_check()) { + __ lbu(t0, Address(op->klass()->as_register(), + InstanceKlass::init_state_offset())); + __ mvw(t1, InstanceKlass::fully_initialized); + add_debug_info_for_null_check_here(op->stub()->info()); + __ bne(t0, t1, *op->stub()->entry(), /* is_far */ true); + } + + __ allocate_object(op->obj()->as_register(), + op->tmp1()->as_register(), + op->tmp2()->as_register(), + op->header_size(), + op->object_size(), + op->klass()->as_register(), + *op->stub()->entry()); + + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { + Register len = op->len()->as_register(); + + if (UseSlowPath || + (!UseFastNewObjectArray && is_reference_type(op->type())) || + (!UseFastNewTypeArray && !is_reference_type(op->type()))) { + __ j(*op->stub()->entry()); + } else { + Register tmp1 = op->tmp1()->as_register(); + Register tmp2 = op->tmp2()->as_register(); + Register tmp3 = op->tmp3()->as_register(); + if (len == tmp1) { + tmp1 = tmp3; + } else if (len == tmp2) { + tmp2 = tmp3; + } else if (len == tmp3) { + // everything is ok + } else { + __ mv(tmp3, len); + } + __ allocate_array(op->obj()->as_register(), + len, + tmp1, + tmp2, + arrayOopDesc::header_size(op->type()), + array_element_size(op->type()), + op->klass()->as_register(), + *op->stub()->entry()); + } + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, + Register recv, Label* update_done) { + for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { + Label next_test; + // See if the receiver is receiver[n]. + __ ld(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); + __ bne(recv, t1, next_test); + Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); + __ add_memory_int64(data_addr, DataLayout::counter_increment); + __ j(*update_done); + __ bind(next_test); + } + + // Didn't find receiver; find next empty slot and fill it in + for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { + Label next_test; + Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))); + __ ld(t1, recv_addr); + __ bnez(t1, next_test); + __ sd(recv, recv_addr); + __ li(t1, DataLayout::counter_increment); + __ sd(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); + __ j(*update_done); + __ bind(next_test); + } +} + +void LIR_Assembler::data_check(LIR_OpTypeCheck *op, ciMethodData **md, ciProfileData **data) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + int bci = op->profiled_bci(); + *md = method->method_data_or_null(); + guarantee(*md != NULL, "Sanity"); + *data = ((*md)->bci_to_data(bci)); + assert(*data != NULL, "need data for type check"); + assert((*data)->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); +} + +void LIR_Assembler::typecheck_helper_slowcheck(ciKlass *k, Register obj, Register Rtmp1, + Register k_RInfo, Register klass_RInfo, + Label *failure_target, Label *success_target) { + // get object class + // not a safepoint as obj null check happens earlier + __ load_klass(klass_RInfo, obj); + if (k->is_loaded()) { + // See if we get an immediate positive hit + __ ld(t0, Address(klass_RInfo, int64_t(k->super_check_offset()))); + if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { + __ bne(k_RInfo, t0, *failure_target, /* is_far */ true); + // successful cast, fall through to profile or jump + } else { + // See if we get an immediate positive hit + __ beq(k_RInfo, t0, *success_target); + // check for self + __ beq(klass_RInfo, k_RInfo, *success_target); + + __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo + __ sd(k_RInfo, Address(sp, 0)); // sub klass + __ sd(klass_RInfo, Address(sp, wordSize)); // super klass + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + // load result to k_RInfo + __ ld(k_RInfo, Address(sp, 0)); + __ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo + // result is a boolean + __ beqz(k_RInfo, *failure_target, /* is_far */ true); + // successful cast, fall through to profile or jump + } + } else { + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); + // call out-of-line instance of __ check_klass_subtytpe_slow_path(...) + __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo + __ sd(klass_RInfo, Address(sp, wordSize)); // sub klass + __ sd(k_RInfo, Address(sp, 0)); // super klass + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + // load result to k_RInfo + __ ld(k_RInfo, Address(sp, 0)); + __ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo + // result is a boolean + __ beqz(k_RInfo, *failure_target, /* is_far */ true); + // successful cast, fall thriugh to profile or jump + } +} + +void LIR_Assembler::profile_object(ciMethodData* md, ciProfileData* data, Register obj, + Register klass_RInfo, Label* obj_is_null) { + Label not_null; + __ bnez(obj, not_null); + // Object is null, update MDO and exit + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + Address data_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); + __ lbu(t0, data_addr); + __ ori(t0, t0, BitData::null_seen_byte_constant()); + __ sb(t0, data_addr); + __ j(*obj_is_null); + __ bind(not_null); +} + +void LIR_Assembler::typecheck_loaded(LIR_OpTypeCheck *op, ciKlass* k, Register k_RInfo) { + if (!k->is_loaded()) { + klass2reg_with_patching(k_RInfo, op->info_for_patch()); + } else { + __ mov_metadata(k_RInfo, k->constant_encoding()); + } +} + +void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) { + Register obj = op->object()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register dst = op->result_opr()->as_register(); + ciKlass* k = op->klass(); + Register Rtmp1 = noreg; + + // check if it needs to be profiled + ciMethodData* md = NULL; + ciProfileData* data = NULL; + + const bool should_profile = op->should_profile(); + if (should_profile) { + data_check(op, &md, &data); + } + Label profile_cast_success, profile_cast_failure; + Label *success_target = should_profile ? &profile_cast_success : success; + Label *failure_target = should_profile ? &profile_cast_failure : failure; + + if (obj == k_RInfo) { + k_RInfo = dst; + } else if (obj == klass_RInfo) { + klass_RInfo = dst; + } + if (k->is_loaded() && !UseCompressedClassPointers) { + select_different_registers(obj, dst, k_RInfo, klass_RInfo); + } else { + Rtmp1 = op->tmp3()->as_register(); + select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1); + } + + assert_different_registers(obj, k_RInfo, klass_RInfo); + + if (should_profile) { + profile_object(md, data, obj, klass_RInfo, obj_is_null); + } else { + __ beqz(obj, *obj_is_null); + } + + typecheck_loaded(op, k, k_RInfo); + __ verify_oop(obj); + + if (op->fast_check()) { + // get object class + // not a safepoint as obj null check happens earlier + __ load_klass(t0, obj); + __ bne(t0, k_RInfo, *failure_target, /* is_far */ true); + // successful cast, fall through to profile or jump + } else { + typecheck_helper_slowcheck(k, obj, Rtmp1, k_RInfo, klass_RInfo, failure_target, success_target); + } + if (should_profile) { + type_profile(obj, md, klass_RInfo, k_RInfo, data, success, failure, profile_cast_success, profile_cast_failure); + } + __ j(*success); +} + +void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { + const bool should_profile = op->should_profile(); + + LIR_Code code = op->code(); + if (code == lir_store_check) { + typecheck_lir_store(op, should_profile); + } else if (code == lir_checkcast) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success; + emit_typecheck_helper(op, &success, op->stub()->entry(), &success); + __ bind(success); + if (dst != obj) { + __ mv(dst, obj); + } + } else if (code == lir_instanceof) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success, failure, done; + emit_typecheck_helper(op, &success, &failure, &failure); + __ bind(failure); + __ mv(dst, zr); + __ j(done); + __ bind(success); + __ mv(dst, 1); + __ bind(done); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + assert(VM_Version::supports_cx8(), "wrong machine"); + Register addr; + if (op->addr()->is_register()) { + addr = as_reg(op->addr()); + } else { + assert(op->addr()->is_address(), "what else?"); + LIR_Address* addr_ptr = op->addr()->as_address_ptr(); + assert(addr_ptr->disp() == 0, "need 0 disp"); + assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); + addr = as_reg(addr_ptr->base()); + } + Register newval = as_reg(op->new_value()); + Register cmpval = as_reg(op->cmp_value()); + + if (op->code() == lir_cas_obj) { + if (UseCompressedOops) { + Register tmp1 = op->tmp1()->as_register(); + assert(op->tmp1()->is_valid(), "must be"); + __ encode_heap_oop(tmp1, cmpval); + cmpval = tmp1; + __ encode_heap_oop(t1, newval); + newval = t1; + caswu(addr, newval, cmpval); + } else { + casl(addr, newval, cmpval); + } + } else if (op->code() == lir_cas_int) { + casw(addr, newval, cmpval); + } else { + casl(addr, newval, cmpval); + } +} + +void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) { + switch (code) { + case lir_abs: __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break; + case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break; + default: ShouldNotReachHere(); + } +} + +void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) { + assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register"); + Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + if (dst->is_single_cpu()) { + Register Rdst = dst->as_register(); + if (right->is_constant()) { + int right_const = right->as_jint(); + if (Assembler::operand_valid_for_add_immediate(right_const)) { + logic_op_imm(Rdst, Rleft, right_const, code); + __ addw(Rdst, Rdst, zr); + } else { + __ mv(t0, right_const); + logic_op_reg32(Rdst, Rleft, t0, code); + } + } else { + Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); + logic_op_reg32(Rdst, Rleft, Rright, code); + } + } else { + Register Rdst = dst->as_register_lo(); + if (right->is_constant()) { + long right_const = right->as_jlong(); + if (Assembler::operand_valid_for_add_immediate(right_const)) { + logic_op_imm(Rdst, Rleft, right_const, code); + } else { + __ mv(t0, right_const); + logic_op_reg(Rdst, Rleft, t0, code); + } + } else { + Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); + logic_op_reg(Rdst, Rleft, Rright, code); + } + } +} + +void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr src, LIR_Opr result, LIR_Op2* op) { + ShouldNotCallThis(); +} + +void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op) { + if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { + bool is_unordered_less = (code == lir_ucmp_fd2i); + if (left->is_single_fpu()) { + __ float_cmp(true, is_unordered_less ? -1 : 1, + left->as_float_reg(), right->as_float_reg(), dst->as_register()); + } else if (left->is_double_fpu()) { + __ float_cmp(false, is_unordered_less ? -1 : 1, + left->as_double_reg(), right->as_double_reg(), dst->as_register()); + } else { + ShouldNotReachHere(); + } + } else if (code == lir_cmp_l2i) { + __ cmp_l2i(dst->as_register(), left->as_register_lo(), right->as_register_lo()); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::align_call(LIR_Code code) { + // With RVC a call instruction may get 2-byte aligned. + // The address of the call instruction needs to be 4-byte aligned to + // ensure that it does not span a cache line so that it can be patched. + __ align(4); +} + +void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { + address call = __ trampoline_call(Address(op->addr(), rtype)); + if (call == NULL) { + bailout("trampoline stub overflow"); + return; + } + add_call_info(code_offset(), op->info()); +} + +void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { + address call = __ ic_call(op->addr()); + if (call == NULL) { + bailout("trampoline stub overflow"); + return; + } + add_call_info(code_offset(), op->info()); +} + +void LIR_Assembler::emit_static_call_stub() { + address call_pc = __ pc(); + assert((__ offset() % 4) == 0, "bad alignment"); + address stub = __ start_a_stub(call_stub_size()); + if (stub == NULL) { + bailout("static call stub overflow"); + return; + } + + int start = __ offset(); + + __ relocate(static_stub_Relocation::spec(call_pc)); + __ emit_static_call_stub(); + + assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() + <= call_stub_size(), "stub too big"); + __ end_a_stub(); +} + +void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { + assert(exceptionOop->as_register() == x10, "must match"); + assert(exceptionPC->as_register() == x13, "must match"); + + // exception object is not added to oop map by LinearScan + // (LinearScan assumes that no oops are in fixed registers) + info->add_register_oop(exceptionOop); + Runtime1::StubID unwind_id; + + // get current pc information + // pc is only needed if the method has an exception handler, the unwind code does not need it. + if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) { + // As no instructions have been generated yet for this LIR node it's + // possible that an oop map already exists for the current offset. + // In that case insert an dummy NOP here to ensure all oop map PCs + // are unique. See JDK-8237483. + __ nop(); + } + int pc_for_athrow_offset = __ offset(); + InternalAddress pc_for_athrow(__ pc()); + int32_t off = 0; + __ la_patchable(exceptionPC->as_register(), pc_for_athrow, off); + __ addi(exceptionPC->as_register(), exceptionPC->as_register(), off); + add_call_info(pc_for_athrow_offset, info); // for exception handler + + __ verify_not_null_oop(x10); + // search an exception handler (x10: exception oop, x13: throwing pc) + if (compilation()->has_fpu_code()) { + unwind_id = Runtime1::handle_exception_id; + } else { + unwind_id = Runtime1::handle_exception_nofpu_id; + } + __ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id))); + __ nop(); +} + +void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { + assert(exceptionOop->as_register() == x10, "must match"); + __ j(_unwind_handler_entry); +} + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { + Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); + Register count_reg = count->as_register(); + if (dest->is_single_cpu()) { + assert (dest->type() == T_INT, "unexpected result type"); + assert (left->type() == T_INT, "unexpected left type"); + __ andi(t0, count_reg, 31); // should not shift more than 31 bits + switch (code) { + case lir_shl: __ sllw(dest_reg, left_reg, t0); break; + case lir_shr: __ sraw(dest_reg, left_reg, t0); break; + case lir_ushr: __ srlw(dest_reg, left_reg, t0); break; + default: ShouldNotReachHere(); + } + } else if (dest->is_double_cpu()) { + __ andi(t0, count_reg, 63); // should not shift more than 63 bits + switch (code) { + case lir_shl: __ sll(dest_reg, left_reg, t0); break; + case lir_shr: __ sra(dest_reg, left_reg, t0); break; + case lir_ushr: __ srl(dest_reg, left_reg, t0); break; + default: ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { + Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); + if (dest->is_single_cpu()) { + assert (dest->type() == T_INT, "unexpected result type"); + assert (left->type() == T_INT, "unexpected left type"); + count &= 0x1f; + if (count != 0) { + switch (code) { + case lir_shl: __ slliw(dest_reg, left_reg, count); break; + case lir_shr: __ sraiw(dest_reg, left_reg, count); break; + case lir_ushr: __ srliw(dest_reg, left_reg, count); break; + default: ShouldNotReachHere(); + } + } else { + move_regs(left_reg, dest_reg); + } + } else if (dest->is_double_cpu()) { + count &= 0x3f; + if (count != 0) { + switch (code) { + case lir_shl: __ slli(dest_reg, left_reg, count); break; + case lir_shr: __ srai(dest_reg, left_reg, count); break; + case lir_ushr: __ srli(dest_reg, left_reg, count); break; + default: ShouldNotReachHere(); + } + } else { + move_regs(left->as_register_lo(), dest->as_register_lo()); + } + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::emit_lock(LIR_OpLock* op) { + Register obj = op->obj_opr()->as_register(); // may not be an oop + Register hdr = op->hdr_opr()->as_register(); + Register lock = op->lock_opr()->as_register(); + if (!UseFastLocking) { + __ j(*op->stub()->entry()); + } else if (op->code() == lir_lock) { + Register scratch = noreg; + if (UseBiasedLocking) { + scratch = op->scratch_opr()->as_register(); + } + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + // add debug info for NullPointerException only if one is possible + int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); + if (op->info() != NULL) { + add_debug_info_for_null_check(null_check_offset, op->info()); + } + } else if (op->code() == lir_unlock) { + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + __ unlock_object(hdr, obj, lock, *op->stub()->entry()); + } else { + Unimplemented(); + } + __ bind(*op->stub()->continuation()); +} + + +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); + + // Update counter for all call types + ciMethodData* md = method->method_data_or_null(); + guarantee(md != NULL, "Sanity"); + ciProfileData* data = md->bci_to_data(bci); + assert(data != NULL && data->is_CounterData(), "need CounterData for calls"); + assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); + Register mdo = op->mdo()->as_register(); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + // Perform additional virtual call profiling for invokevirtual and + // invokeinterface bytecodes + if (op->should_profile_receiver_type()) { + assert(op->recv()->is_single_cpu(), "recv must be allocated"); + Register recv = op->recv()->as_register(); + assert_different_registers(mdo, recv); + assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); + ciKlass* known_klass = op->known_holder(); + if (C1OptimizeVirtualCallProfiling && known_klass != NULL) { + // We know the type that will be seen at this call site; we can + // statically update the MethodData* rather than needing to do + // dynamic tests on the receiver type + // NOTE: we should probably put a lock around this search to + // avoid collisions by concurrent compilations + ciVirtualCallData* vc_data = (ciVirtualCallData*) data; + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ add_memory_int64(data_addr, DataLayout::counter_increment); + return; + } + } + + // Receiver type not found in profile data; select an empty slot + // Note that this is less efficient than it should be because it + // always does a write to the receiver part of the + // VirtualCallData rather than just the first time + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (receiver == NULL) { + Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); + __ mov_metadata(t1, known_klass->constant_encoding()); + __ sd(t1, recv_addr); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ add_memory_int64(data_addr, DataLayout::counter_increment); + return; + } + } + } else { + __ load_klass(recv, recv); + Label update_done; + type_profile_helper(mdo, md, data, recv, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + __ add_memory_int64(counter_addr, DataLayout::counter_increment); + + __ bind(update_done); + } + } else { + // Static call + __ add_memory_int64(counter_addr, DataLayout::counter_increment); + } +} + +void LIR_Assembler::emit_delay(LIR_OpDelay*) { Unimplemented(); } + +void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { + __ la(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no)); +} + +void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { Unimplemented(); } + +void LIR_Assembler::check_conflict(ciKlass* exact_klass, intptr_t current_klass, + Register tmp, Label &next, Label &none, + Address mdo_addr) { + if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { + if (exact_klass != NULL) { + __ mov_metadata(tmp, exact_klass->constant_encoding()); + } else { + __ load_klass(tmp, tmp); + } + + __ ld(t1, mdo_addr); + __ xorr(tmp, tmp, t1); + __ andi(t0, tmp, TypeEntries::type_klass_mask); + // klass seen before, nothing to do. The unknown bit may have been + // set already but no need to check. + __ beqz(t0, next); + + // already unknown. Nothing to do anymore. + __ andi(t0, tmp, TypeEntries::type_unknown); + __ bnez(t0, next); + + if (TypeEntries::is_type_none(current_klass)) { + __ beqz(t1, none); + __ li(t0, (u1)TypeEntries::null_seen); + __ beq(t0, t1, none); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + __ membar(MacroAssembler::LoadLoad); + __ ld(t1, mdo_addr); + __ xorr(tmp, tmp, t1); + __ andi(t0, tmp, TypeEntries::type_klass_mask); + __ beqz(t0, next); + } + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); + + __ ld(tmp, mdo_addr); + // already unknown. Nothing to do anymore. + __ andi(t0, tmp, TypeEntries::type_unknown); + __ bnez(t0, next); + } + + // different than before. Cannot keep accurate profile. + __ ld(t1, mdo_addr); + __ ori(t1, t1, TypeEntries::type_unknown); + __ sd(t1, mdo_addr); + + if (TypeEntries::is_type_none(current_klass)) { + __ j(next); + + __ bind(none); + // first time here. Set profile type. + __ sd(tmp, mdo_addr); + } +} + +void LIR_Assembler::check_no_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, + Address mdo_addr, Label &next) { + // There's a single possible klass at this profile point + assert(exact_klass != NULL, "should be"); + if (TypeEntries::is_type_none(current_klass)) { + __ mov_metadata(tmp, exact_klass->constant_encoding()); + __ ld(t1, mdo_addr); + __ xorr(tmp, tmp, t1); + __ andi(t0, tmp, TypeEntries::type_klass_mask); + __ beqz(t0, next); +#ifdef ASSERT + { + Label ok; + __ ld(t0, mdo_addr); + __ beqz(t0, ok); + __ li(t1, (u1)TypeEntries::null_seen); + __ beq(t0, t1, ok); + // may have been set by another thread + __ membar(MacroAssembler::LoadLoad); + __ mov_metadata(t0, exact_klass->constant_encoding()); + __ ld(t1, mdo_addr); + __ xorr(t1, t0, t1); + __ andi(t1, t1, TypeEntries::type_mask); + __ beqz(t1, ok); + + __ stop("unexpected profiling mismatch"); + __ bind(ok); + } +#endif + // first time here. Set profile type. + __ sd(tmp, mdo_addr); + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); + + __ ld(tmp, mdo_addr); + // already unknown. Nothing to do anymore. + __ andi(t0, tmp, TypeEntries::type_unknown); + __ bnez(t0, next); + + __ ori(tmp, tmp, TypeEntries::type_unknown); + __ sd(tmp, mdo_addr); + } +} + +void LIR_Assembler::check_null(Register tmp, Label &update, intptr_t current_klass, + Address mdo_addr, bool do_update, Label &next) { + __ bnez(tmp, update); + if (!TypeEntries::was_null_seen(current_klass)) { + __ ld(t1, mdo_addr); + __ ori(t1, t1, TypeEntries::null_seen); + __ sd(t1, mdo_addr); + } + if (do_update) { + __ j(next); + } +} + +void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { + COMMENT("emit_profile_type {"); + Register obj = op->obj()->as_register(); + Register tmp = op->tmp()->as_pointer_register(); + Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); + ciKlass* exact_klass = op->exact_klass(); + intptr_t current_klass = op->current_klass(); + bool not_null = op->not_null(); + bool no_conflict = op->no_conflict(); + + Label update, next, none; + + bool do_null = !not_null; + bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; + bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; + + assert(do_null || do_update, "why are we here?"); + assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); + assert_different_registers(tmp, t0, t1, mdo_addr.base()); + + __ verify_oop(obj); + + if (tmp != obj) { + __ mv(tmp, obj); + } + if (do_null) { + check_null(tmp, update, current_klass, mdo_addr, do_update, next); +#ifdef ASSERT + } else { + __ bnez(tmp, update); + __ stop("unexpected null obj"); +#endif + } + + __ bind(update); + + if (do_update) { +#ifdef ASSERT + if (exact_klass != NULL) { + check_exact_klass(tmp, exact_klass); + } +#endif + if (!no_conflict) { + check_conflict(exact_klass, current_klass, tmp, next, none, mdo_addr); + } else { + check_no_conflict(exact_klass, current_klass, tmp, mdo_addr, next); + } + + __ bind(next); + } + COMMENT("} emit_profile_type"); +} + +void LIR_Assembler::align_backward_branch_target() { } + +void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) { + // tmp must be unused + assert(tmp->is_illegal(), "wasting a register if tmp is allocated"); + + if (left->is_single_cpu()) { + assert(dest->is_single_cpu(), "expect single result reg"); + __ negw(dest->as_register(), left->as_register()); + } else if (left->is_double_cpu()) { + assert(dest->is_double_cpu(), "expect double result reg"); + __ neg(dest->as_register_lo(), left->as_register_lo()); + } else if (left->is_single_fpu()) { + assert(dest->is_single_fpu(), "expect single float result reg"); + __ fneg_s(dest->as_float_reg(), left->as_float_reg()); + } else { + assert(left->is_double_fpu(), "expect double float operand reg"); + assert(dest->is_double_fpu(), "expect double float result reg"); + __ fneg_d(dest->as_double_reg(), left->as_double_reg()); + } +} + + +void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { + if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } + + LIR_Address* adr = addr->as_address_ptr(); + Register dst = dest->as_register_lo(); + + assert_different_registers(dst, t0); + if (adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) { + int scale = adr->scale(); + intptr_t offset = adr->disp(); + LIR_Opr index_op = adr->index(); + if (index_op->is_constant()) { + offset += ((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale; + } + + if (!is_imm_in_range(offset, 12, 0)) { + __ la(t0, as_Address(adr)); + __ mv(dst, t0); + return; + } + } + + __ la(dst, as_Address(adr)); +} + + +void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) { + assert(!tmp->is_valid(), "don't need temporary"); + + CodeBlob *cb = CodeCache::find_blob(dest); + if (cb != NULL) { + __ far_call(RuntimeAddress(dest)); + } else { + int32_t offset = 0; + __ la_patchable(t0, RuntimeAddress(dest), offset); + __ jalr(x1, t0, offset); + } + + if (info != NULL) { + add_call_info_here(info); + } +} + +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { + if (dest->is_address() || src->is_address()) { + move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/false, /* wide */ false); + } else { + ShouldNotReachHere(); + } +} + +#ifdef ASSERT +// emit run-time assertion +void LIR_Assembler::emit_assert(LIR_OpAssert* op) { + assert(op->code() == lir_assert, "must be"); + + Label ok; + if (op->in_opr1()->is_valid()) { + assert(op->in_opr2()->is_valid(), "both operands must be valid"); + bool is_unordered = false; + LIR_Condition cond = op->condition(); + emit_branch(cond, op->in_opr1(), op->in_opr2(), ok, /* is_far */ false, + /* is_unordered */(cond == lir_cond_greaterEqual || cond == lir_cond_greater) ? false : true); + } else { + assert(op->in_opr2()->is_illegal(), "both operands must be illegal"); + assert(op->condition() == lir_cond_always, "no other conditions allowed"); + } + + if (op->halt()) { + const char* str = __ code_string(op->msg()); + __ stop(str); + } else { + breakpoint(); + } + __ bind(ok); +} +#endif + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +void LIR_Assembler::membar() { + COMMENT("membar"); + __ membar(MacroAssembler::AnyAny); +} + +void LIR_Assembler::membar_acquire() { + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); +} + +void LIR_Assembler::membar_release() { + __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); +} + +void LIR_Assembler::membar_loadload() { + __ membar(MacroAssembler::LoadLoad); +} + +void LIR_Assembler::membar_storestore() { + __ membar(MacroAssembler::StoreStore); +} + +void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); } + +void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); } + +void LIR_Assembler::on_spin_wait() { + Unimplemented(); +} + +void LIR_Assembler::get_thread(LIR_Opr result_reg) { + __ mv(result_reg->as_register(), xthread); +} + +void LIR_Assembler::peephole(LIR_List *lir) {} + +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) { + Address addr = as_Address(src->as_address_ptr()); + BasicType type = src->type(); + bool is_oop = is_reference_type(type); + + get_op(type); + + switch (code) { + case lir_xadd: + { + RegisterOrConstant inc; + Register tmp = as_reg(tmp_op); + Register dst = as_reg(dest); + if (data->is_constant()) { + inc = RegisterOrConstant(as_long(data)); + assert_different_registers(dst, addr.base(), tmp); + assert_different_registers(tmp, t0); + } else { + inc = RegisterOrConstant(as_reg(data)); + assert_different_registers(inc.as_register(), dst, addr.base(), tmp); + } + __ la(tmp, addr); + (_masm->*add)(dst, inc, tmp); + break; + } + case lir_xchg: + { + Register tmp = tmp_op->as_register(); + Register obj = as_reg(data); + Register dst = as_reg(dest); + if (is_oop && UseCompressedOops) { + __ encode_heap_oop(t0, obj); + obj = t0; + } + assert_different_registers(obj, addr.base(), tmp, dst); + __ la(tmp, addr); + (_masm->*xchg)(dst, obj, tmp); + if (is_oop && UseCompressedOops) { + __ decode_heap_oop(dst); + } + } + break; + default: + ShouldNotReachHere(); + } + __ membar(MacroAssembler::AnyAny); +} + +int LIR_Assembler::array_element_size(BasicType type) const { + int elem_size = type2aelembytes(type); + return exact_log2(elem_size); +} + +// helper functions which checks for overflow and sets bailout if it +// occurs. Always returns a valid embeddable pointer but in the +// bailout case the pointer won't be to unique storage. +address LIR_Assembler::float_constant(float f) { + address const_addr = __ float_constant(f); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + +address LIR_Assembler::double_constant(double d) { + address const_addr = __ double_constant(d); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + +address LIR_Assembler::int_constant(jlong n) { + address const_addr = __ long_constant(n); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + +void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { + __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */, + Assembler::rl /* release */, t0, true /* result as bool */); + __ seqz(t0, t0); // cmpxchg not equal, set t0 to 1 + __ membar(MacroAssembler::AnyAny); +} + +void LIR_Assembler::caswu(Register addr, Register newval, Register cmpval) { + __ cmpxchg(addr, cmpval, newval, Assembler::uint32, Assembler::aq /* acquire */, + Assembler::rl /* release */, t0, true /* result as bool */); + __ seqz(t0, t0); // cmpxchg not equal, set t0 to 1 + __ membar(MacroAssembler::AnyAny); +} + +void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) { + __ cmpxchg(addr, cmpval, newval, Assembler::int64, Assembler::aq /* acquire */, + Assembler::rl /* release */, t0, true /* result as bool */); + __ seqz(t0, t0); // cmpxchg not equal, set t0 to 1 + __ membar(MacroAssembler::AnyAny); +} + +void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) { + address target = NULL; + + switch (patching_id(info)) { + case PatchingStub::access_field_id: + target = Runtime1::entry_for(Runtime1::access_field_patching_id); + break; + case PatchingStub::load_klass_id: + target = Runtime1::entry_for(Runtime1::load_klass_patching_id); + break; + case PatchingStub::load_mirror_id: + target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); + break; + case PatchingStub::load_appendix_id: + target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); + break; + default: ShouldNotReachHere(); + } + + __ far_call(RuntimeAddress(target)); + add_call_info_here(info); +} + +void LIR_Assembler::check_exact_klass(Register tmp, ciKlass* exact_klass) { + Label ok; + __ load_klass(tmp, tmp); + __ mov_metadata(t0, exact_klass->constant_encoding()); + __ beq(tmp, t0, ok); + __ stop("exact klass and actual klass differ"); + __ bind(ok); +} + +void LIR_Assembler::get_op(BasicType type) { + switch (type) { + case T_INT: + xchg = &MacroAssembler::atomic_xchgalw; + add = &MacroAssembler::atomic_addalw; + break; + case T_LONG: + xchg = &MacroAssembler::atomic_xchgal; + add = &MacroAssembler::atomic_addal; + break; + case T_OBJECT: + case T_ARRAY: + if (UseCompressedOops) { + xchg = &MacroAssembler::atomic_xchgalwu; + add = &MacroAssembler::atomic_addalw; + } else { + xchg = &MacroAssembler::atomic_xchgal; + add = &MacroAssembler::atomic_addal; + } + break; + default: + ShouldNotReachHere(); + } +} + +// emit_opTypeCheck sub functions +void LIR_Assembler::typecheck_lir_store(LIR_OpTypeCheck* op, bool should_profile) { + Register value = op->object()->as_register(); + Register array = op->array()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register Rtmp1 = op->tmp3()->as_register(); + + CodeStub* stub = op->stub(); + + // check if it needs to be profiled + ciMethodData* md = NULL; + ciProfileData* data = NULL; + + if (should_profile) { + data_check(op, &md, &data); + } + Label profile_cast_success, profile_cast_failure, done; + Label *success_target = should_profile ? &profile_cast_success : &done; + Label *failure_target = should_profile ? &profile_cast_failure : stub->entry(); + + if (should_profile) { + profile_object(md, data, value, klass_RInfo, &done); + } else { + __ beqz(value, done); + } + + add_debug_info_for_null_check_here(op->info_for_exception()); + __ load_klass(k_RInfo, array); + __ load_klass(klass_RInfo, value); + + lir_store_slowcheck(k_RInfo, klass_RInfo, Rtmp1, success_target, failure_target); + + // fall through to the success case + if (should_profile) { + Register mdo = klass_RInfo; + Register recv = k_RInfo; + __ bind(profile_cast_success); + __ mov_metadata(mdo, md->constant_encoding()); + __ load_klass(recv, value); + type_profile_helper(mdo, md, data, recv, &done); + __ j(done); + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ ld(t1, counter_addr); + __ addi(t1, t1, -DataLayout::counter_increment); + __ sd(t1, counter_addr); + __ j(*stub->entry()); + } + + __ bind(done); +} + +void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { + _masm->code_section()->relocate(adr, relocInfo::poll_type); + int pc_offset = code_offset(); + flush_debug_info(pc_offset); + info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); + if (info->exception_handlers() != NULL) { + compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); + } +} + +void LIR_Assembler::type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo, + ciProfileData* data, Label* success, Label* failure, + Label& profile_cast_success, Label& profile_cast_failure) { + Register mdo = klass_RInfo; + Register recv = k_RInfo; + __ bind(profile_cast_success); + __ mov_metadata(mdo, md->constant_encoding()); + __ load_klass(recv, obj); + Label update_done; + type_profile_helper(mdo, md, data, recv, success); + __ j(*success); + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ ld(t0, counter_addr); + __ addi(t0, t0, -DataLayout::counter_increment); + __ sd(t0, counter_addr); + __ j(*failure); +} + +void LIR_Assembler::lir_store_slowcheck(Register k_RInfo, Register klass_RInfo, Register Rtmp1, + Label* success_target, Label* failure_target) { + // get instance klass (it's already uncompressed) + __ ld(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset())); + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); + // call out-of-line instance of __ check_klass_subtype_slow_path(...) + __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo + __ sd(klass_RInfo, Address(sp, wordSize)); // sub klass + __ sd(k_RInfo, Address(sp, 0)); // super klass + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + // load result to k_RInfo + __ ld(k_RInfo, Address(sp, 0)); + __ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo + // result is a boolean + __ beqz(k_RInfo, *failure_target, /* is_far */ true); +} + +void LIR_Assembler::const2reg_helper(LIR_Opr src) { + switch (src->as_constant_ptr()->type()) { + case T_INT: + case T_ADDRESS: + case T_OBJECT: + case T_ARRAY: + case T_METADATA: + const2reg(src, FrameMap::t0_opr, lir_patch_none, NULL); + break; + case T_LONG: + const2reg(src, FrameMap::t0_long_opr, lir_patch_none, NULL); + break; + case T_FLOAT: + case T_DOUBLE: + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::logic_op_reg32(Register dst, Register left, Register right, LIR_Code code) { + switch (code) { + case lir_logic_and: __ andrw(dst, left, right); break; + case lir_logic_or: __ orrw (dst, left, right); break; + case lir_logic_xor: __ xorrw(dst, left, right); break; + default: ShouldNotReachHere(); + } +} + +void LIR_Assembler::logic_op_reg(Register dst, Register left, Register right, LIR_Code code) { + switch (code) { + case lir_logic_and: __ andr(dst, left, right); break; + case lir_logic_or: __ orr (dst, left, right); break; + case lir_logic_xor: __ xorr(dst, left, right); break; + default: ShouldNotReachHere(); + } +} + +void LIR_Assembler::logic_op_imm(Register dst, Register left, int right, LIR_Code code) { + switch (code) { + case lir_logic_and: __ andi(dst, left, right); break; + case lir_logic_or: __ ori (dst, left, right); break; + case lir_logic_xor: __ xori(dst, left, right); break; + default: ShouldNotReachHere(); + } +} + +void LIR_Assembler::store_parameter(Register r, int offset_from_rsp_in_words) { + assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp"); + int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord; + assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ sd(r, Address(sp, offset_from_rsp_in_bytes)); +} + +void LIR_Assembler::store_parameter(jint c, int offset_from_rsp_in_words) { + assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp"); + int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord; + assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ li(t0, c); + __ sd(t0, Address(sp, offset_from_rsp_in_bytes)); +} + +#undef __ Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP +#define CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP + +// ArrayCopyStub needs access to bailout +friend class ArrayCopyStub; + +private: + +#include "c1_LIRAssembler_arith_riscv.hpp" +#include "c1_LIRAssembler_arraycopy_riscv.hpp" + + int array_element_size(BasicType type) const; + + static Register as_reg(LIR_Opr op) { + return op->is_double_cpu() ? op->as_register_lo() : op->as_register(); + } + + Address as_Address(LIR_Address* addr, Register tmp); + + // helper functions which checks for overflow and sets bailout if it + // occurs. Always returns a valid embeddable pointer but in the + // bailout case the pointer won't be to unique storage. + address float_constant(float f); + address double_constant(double d); + address int_constant(jlong n); + + // Ensure we have a valid Address (base + offset) to a stack-slot. + Address stack_slot_address(int index, uint shift, int adjust = 0); + + // Record the type of the receiver in ReceiverTypeData + void type_profile_helper(Register mdo, + ciMethodData *md, ciProfileData *data, + Register recv, Label* update_done); + + void add_debug_info_for_branch(address adr, CodeEmitInfo* info); + + void casw(Register addr, Register newval, Register cmpval); + void caswu(Register addr, Register newval, Register cmpval); + void casl(Register addr, Register newval, Register cmpval); + + void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL); + + void deoptimize_trap(CodeEmitInfo *info); + + enum { + // See emit_static_call_stub for detail + // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address) + _call_stub_size = 14 * NativeInstruction::instruction_size + + (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size), + // See emit_exception_handler for detail + // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY) + _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller + // See emit_deopt_handler for detail + // auipc (1) + far_jump (6 or 2) + _deopt_handler_size = 1 * NativeInstruction::instruction_size + + 6 * NativeInstruction::instruction_size // or smaller + }; + + void check_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, + Label &next, Label &none, Address mdo_addr); + void check_no_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, Address mdo_addr, Label &next); + + void check_exact_klass(Register tmp, ciKlass* exact_klass); + + void check_null(Register tmp, Label &update, intptr_t current_klass, Address mdo_addr, bool do_update, Label &next); + + void (MacroAssembler::*add)(Register prev, RegisterOrConstant incr, Register addr); + void (MacroAssembler::*xchg)(Register prev, Register newv, Register addr); + + void get_op(BasicType type); + + // emit_typecheck_helper sub functions + void data_check(LIR_OpTypeCheck *op, ciMethodData **md, ciProfileData **data); + void typecheck_helper_slowcheck(ciKlass* k, Register obj, Register Rtmp1, + Register k_RInfo, Register klass_RInfo, + Label* failure_target, Label* success_target); + void profile_object(ciMethodData* md, ciProfileData* data, Register obj, + Register klass_RInfo, Label* obj_is_null); + void typecheck_loaded(LIR_OpTypeCheck* op, ciKlass* k, Register k_RInfo); + + // emit_opTypeCheck sub functions + void typecheck_lir_store(LIR_OpTypeCheck* op, bool should_profile); + + void type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo, + ciProfileData* data, Label* success, Label* failure, + Label& profile_cast_success, Label& profile_cast_failure); + + void lir_store_slowcheck(Register k_RInfo, Register klass_RInfo, Register Rtmp1, + Label* success_target, Label* failure_target); + + void const2reg_helper(LIR_Opr src); + + void emit_branch(LIR_Condition cmp_flag, LIR_Opr cmp1, LIR_Opr cmp2, Label& label, bool is_far, bool is_unordered); + + void logic_op_reg32(Register dst, Register left, Register right, LIR_Code code); + void logic_op_reg(Register dst, Register left, Register right, LIR_Code code); + void logic_op_imm(Register dst, Register left, int right, LIR_Code code); + +public: + + void emit_cmove(LIR_Op4* op); + + void store_parameter(Register r, int offset_from_rsp_in_words); + void store_parameter(jint c, int offset_from_rsp_in_words); + +#endif // CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp @@ -0,0 +1,1079 @@ +/* + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_LIRGenerator.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArray.hpp" +#include "ci/ciObjArrayKlass.hpp" +#include "ci/ciTypeArrayKlass.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/powerOfTwo.hpp" +#include "vmreg_riscv.inline.hpp" + +#ifdef ASSERT +#define __ gen()->lir(__FILE__, __LINE__)-> +#else +#define __ gen()->lir()-> +#endif + +// Item will be loaded into a byte register; Intel only +void LIRItem::load_byte_item() { + load_item(); +} + + +void LIRItem::load_nonconstant() { + LIR_Opr r = value()->operand(); + if (r->is_constant()) { + _result = r; + } else { + load_item(); + } +} + +//-------------------------------------------------------------- +// LIRGenerator +//-------------------------------------------------------------- + + +LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::r10_oop_opr; } +LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::r13_opr; } +LIR_Opr LIRGenerator::divInOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::divOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::remOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::shiftCountOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::syncLockOpr() { return new_register(T_INT); } +LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::r10_opr; } +LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; } + + +LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) { + LIR_Opr opr; + switch (type->tag()) { + case intTag: opr = FrameMap::r10_opr; break; + case objectTag: opr = FrameMap::r10_oop_opr; break; + case longTag: opr = FrameMap::long10_opr; break; + case floatTag: opr = FrameMap::fpu10_float_opr; break; + case doubleTag: opr = FrameMap::fpu10_double_opr; break; + + case addressTag: // fall through + default: + ShouldNotReachHere(); + return LIR_OprFact::illegalOpr; + } + + assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); + return opr; +} + + +LIR_Opr LIRGenerator::rlock_byte(BasicType type) { + LIR_Opr reg = new_register(T_INT); + set_vreg_flag(reg, LIRGenerator::byte_reg); + return reg; +} + +//--------- loading items into registers -------------------------------- + + +bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { + if (v->type()->as_IntConstant() != NULL) { + return v->type()->as_IntConstant()->value() == 0; + } else if (v->type()->as_LongConstant() != NULL) { + return v->type()->as_LongConstant()->value() == 0; + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else if (v->type()->as_FloatConstant() != NULL) { + return jint_cast(v->type()->as_FloatConstant()->value()) == 0.0f; + } else if (v->type()->as_DoubleConstant() != NULL) { + return jlong_cast(v->type()->as_DoubleConstant()->value()) == 0.0; + } + return false; +} + +bool LIRGenerator::can_inline_as_constant(Value v) const { + if (v->type()->as_IntConstant() != NULL) { + int value = v->type()->as_IntConstant()->value(); + // "-value" must be defined for value may be used for sub + return Assembler::operand_valid_for_add_immediate(value) && + Assembler::operand_valid_for_add_immediate(- value); + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else if (v->type()->as_LongConstant() != NULL) { + long value = v->type()->as_LongConstant()->value(); + // "-value" must be defined for value may be used for sub + return Assembler::operand_valid_for_add_immediate(value) && + Assembler::operand_valid_for_add_immediate(- value); + } else if (v->type()->as_FloatConstant() != NULL) { + return v->type()->as_FloatConstant()->value() == 0.0f; + } else if (v->type()->as_DoubleConstant() != NULL) { + return v->type()->as_DoubleConstant()->value() == 0.0; + } + return false; +} + +bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { + if (c->as_constant() != NULL) { + long constant = 0; + switch (c->type()) { + case T_INT: constant = c->as_jint(); break; + case T_LONG: constant = c->as_jlong(); break; + default: return false; + } + // "-constant" must be defined for c may be used for sub + return Assembler::operand_valid_for_add_immediate(constant) && + Assembler::operand_valid_for_add_immediate(- constant); + } + return false; +} + +LIR_Opr LIRGenerator::safepoint_poll_register() { + return LIR_OprFact::illegalOpr; +} + +LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, + int shift, int disp, BasicType type) { + assert(base->is_register(), "must be"); + + if (index->is_constant()) { + LIR_Const *constant = index->as_constant_ptr(); + jlong c; + if (constant->type() == T_INT) { + c = (jlong(index->as_jint()) << shift) + disp; + } else { + assert(constant->type() == T_LONG, "should be"); + c = (index->as_jlong() << shift) + disp; + } + if ((jlong)((jint)c) == c) { + return new LIR_Address(base, (jint)c, type); + } else { + LIR_Opr tmp = new_register(T_LONG); + __ move(index, tmp); + return new LIR_Address(base, tmp, type); + } + } + + return new LIR_Address(base, index, (LIR_Address::Scale)shift, disp, type); +} + +LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, + BasicType type) { + int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); + int elem_size = type2aelembytes(type); + int shift = exact_log2(elem_size); + return generate_address(array_opr, index_opr, shift, offset_in_bytes, type); +} + +LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { + LIR_Opr r; + switch (type) { + case T_LONG: + r = LIR_OprFact::longConst(x); + break; + case T_INT: + r = LIR_OprFact::intConst(x); + break; + default: + ShouldNotReachHere(); + r = NULL; // unreachable + } + return r; +} + +void LIRGenerator::increment_counter(address counter, BasicType type, int step) { + LIR_Opr pointer = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(counter), pointer); + LIR_Address* addr = new LIR_Address(pointer, type); + increment_counter(addr, step); +} + +void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + LIR_Opr reg = new_register(addr->type()); + __ load(addr, reg); + __ add(reg, load_immediate(step, addr->type()), reg); + __ store(reg, addr); +} + +void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { + LIR_Opr reg = new_register(T_INT); + __ load(generate_address(base, disp, T_INT), reg, info); + __ cmp(condition, reg, LIR_OprFact::intConst(c)); +} + +void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { + LIR_Opr reg1 = new_register(T_INT); + __ load(generate_address(base, disp, type), reg1, info); + __ cmp(condition, reg, reg1); +} + +bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + if (tmp->is_valid() && c > 0 && c < max_jint) { + if (is_power_of_2(c - 1)) { + __ shift_left(left, exact_log2(c - 1), tmp); + __ add(tmp, left, result); + return true; + } else if (is_power_of_2(c + 1)) { + __ shift_left(left, exact_log2(c + 1), tmp); + __ sub(tmp, left, result); + return true; + } + } + return false; +} + +void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) { + BasicType type = item->type(); + __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type)); +} + +void LIRGenerator::array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info, + ciMethod* profiled_method, int profiled_bci) { + LIR_Opr tmp1 = new_register(objectType); + LIR_Opr tmp2 = new_register(objectType); + LIR_Opr tmp3 = new_register(objectType); + __ store_check(value, array, tmp1, tmp2, tmp3, store_check_info, profiled_method, profiled_bci); +} + +//---------------------------------------------------------------------- +// visitor functions +//---------------------------------------------------------------------- + +void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { + assert(x->is_pinned(), ""); + LIRItem obj(x->obj(), this); + obj.load_item(); + + set_no_result(x); + + // "lock" stores the address of the monitor stack slot, so this is not an oop + LIR_Opr lock = new_register(T_INT); + // Need a scratch register for biased locking + LIR_Opr scratch = LIR_OprFact::illegalOpr; + if (UseBiasedLocking) { + scratch = new_register(T_INT); + } + + CodeEmitInfo* info_for_exception = NULL; + if (x->needs_null_check()) { + info_for_exception = state_for(x); + } + // this CodeEmitInfo must not have the xhandlers because here the + // object is already locked (xhandlers expect object to be unlocked) + CodeEmitInfo* info = state_for(x, x->state(), true); + monitor_enter(obj.result(), lock, syncTempOpr(), scratch, + x->monitor_no(), info_for_exception, info); +} + +void LIRGenerator::do_MonitorExit(MonitorExit* x) { + assert(x->is_pinned(), ""); + + LIRItem obj(x->obj(), this); + obj.dont_load_item(); + + LIR_Opr lock = new_register(T_INT); + LIR_Opr obj_temp = new_register(T_INT); + set_no_result(x); + monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no()); +} + +// neg +void LIRGenerator::do_NegateOp(NegateOp* x) { + LIRItem from(x->x(), this); + from.load_item(); + LIR_Opr result = rlock_result(x); + __ negate(from.result(), result); +} + +// for _fadd, _fmul, _fsub, _fdiv, _frem +// _dadd, _dmul, _dsub, _ddiv, _drem +void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) { + + // float remainder is implemented as a direct call into the runtime + BasicTypeList signature(2); + if (x->op() == Bytecodes::_frem) { + signature.append(T_FLOAT); + signature.append(T_FLOAT); + } else { + signature.append(T_DOUBLE); + signature.append(T_DOUBLE); + } + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + + const LIR_Opr result_reg = result_register_for(x->type()); + + left.load_item(); + __ move(left.result(), cc->at(0)); + right.load_item_force(cc->at(1)); + + address entry; + if (x->op() == Bytecodes::_frem) { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem); + } else { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem); + } + + LIR_Opr result = rlock_result(x); + __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + + return; + } + + if (!left.is_register()) { + left.load_item(); + } + // Always load right hand side. + right.load_item(); + + LIR_Opr reg = rlock(x); + arithmetic_op_fpu(x->op(), reg, left.result(), right.result()); + + set_result(x, round_item(reg)); +} + +// for _ladd, _lmul, _lsub, _ldiv, _lrem +void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + + // missing test if instr is commutative and if we should swap + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { + + left.load_item(); + + bool need_zero_check = true; + if (right.is_constant()) { + jlong c = right.get_jlong_constant(); + // no need to do div-by-zero check if the divisor is a non-zero constant + if (c != 0) { need_zero_check = false; } + // do not load right if the divisor is a power-of-2 constant + if (c > 0 && is_power_of_2(c)) { + right.dont_load_item(); + } else { + right.load_item(); + } + } else { + right.load_item(); + } + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); + __ branch(lir_cond_equal, new DivByZeroStub(info)); + } + + rlock_result(x); + switch (x->op()) { + case Bytecodes::_lrem: + __ rem(left.result(), right.result(), x->operand()); + break; + case Bytecodes::_ldiv: + __ div(left.result(), right.result(), x->operand()); + break; + default: + ShouldNotReachHere(); + } + } else { + assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, + "expect lmul, ladd or lsub"); + // add, sub, mul + left.load_item(); + if (!right.is_register()) { + if (x->op() == Bytecodes::_lmul || + !right.is_constant() || + (x->op() == Bytecodes::_ladd && + !Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) || + (x->op() == Bytecodes::_lsub && + !Assembler::operand_valid_for_add_immediate(-right.get_jlong_constant()))) { + right.load_item(); + } else { // add, sub + assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expected ladd or lsub"); + // don't load constants to save register + right.load_nonconstant(); + } + } + rlock_result(x); + arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); + } +} + +// for: _iadd, _imul, _isub, _idiv, _irem +void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { + + // Test if instr is commutative and if we should swap + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + if (x->is_commutative() && left.is_stack() && right.is_register()) { + // swap them if left is real stack (or cached) and right is real register(not cached) + left_arg = &right; + right_arg = &left; + } + left_arg->load_item(); + // do not need to load right, as we can handle stack and constants + if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) { + + rlock_result(x); + + bool need_zero_check = true; + if (right.is_constant()) { + jint c = right.get_jint_constant(); + // no need to do div-by-zero check if the divisor is a non-zero constant + if (c != 0) { need_zero_check = false; } + // do not load right if the divisor is a power-of-2 constant + if (c > 0 && is_power_of_2(c)) { + right_arg->dont_load_item(); + } else { + right_arg->load_item(); + } + } else { + right_arg->load_item(); + } + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0)); + __ branch(lir_cond_equal, new DivByZeroStub(info)); + } + + LIR_Opr ill = LIR_OprFact::illegalOpr; + if (x->op() == Bytecodes::_irem) { + __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); + } else if (x->op() == Bytecodes::_idiv) { + __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); + } + + } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) { + if (right.is_constant() && + ((x->op() == Bytecodes::_iadd && !Assembler::operand_valid_for_add_immediate(right.get_jint_constant())) || + (x->op() == Bytecodes::_isub && !Assembler::operand_valid_for_add_immediate(-right.get_jint_constant())))) { + right.load_nonconstant(); + } else { + right.load_item(); + } + rlock_result(x); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr); + } else { + assert (x->op() == Bytecodes::_imul, "expect imul"); + if (right.is_constant()) { + jint c = right.get_jint_constant(); + if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) { + right_arg->dont_load_item(); + } else { + // Cannot use constant op. + right_arg->load_item(); + } + } else { + right.load_item(); + } + rlock_result(x); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT)); + } +} + +void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) { + // when an operand with use count 1 is the left operand, then it is + // likely that no move for 2-operand-LIR-form is necessary + if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) { + x->swap_operands(); + } + + ValueTag tag = x->type()->tag(); + assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters"); + switch (tag) { + case floatTag: + case doubleTag: do_ArithmeticOp_FPU(x); return; + case longTag: do_ArithmeticOp_Long(x); return; + case intTag: do_ArithmeticOp_Int(x); return; + default: ShouldNotReachHere(); return; + } +} + +// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr +void LIRGenerator::do_ShiftOp(ShiftOp* x) { + LIRItem value(x->x(), this); + LIRItem count(x->y(), this); + + value.load_item(); + if (count.is_constant()) { + assert(count.type()->as_IntConstant() != NULL || count.type()->as_LongConstant() != NULL , "should be"); + count.dont_load_item(); + } else { + count.load_item(); + } + + LIR_Opr res = rlock_result(x); + shift_op(x->op(), res, value.result(), count.result(), LIR_OprFact::illegalOpr); +} + + +// _iand, _land, _ior, _lor, _ixor, _lxor +void LIRGenerator::do_LogicOp(LogicOp* x) { + + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + rlock_result(x); + ValueTag tag = right.type()->tag(); + if (right.is_constant() && + ((tag == longTag && Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) || + (tag == intTag && Assembler::operand_valid_for_add_immediate(right.get_jint_constant())))) { + right.dont_load_item(); + } else { + right.load_item(); + } + + switch (x->op()) { + case Bytecodes::_iand: // fall through + case Bytecodes::_land: + __ logical_and(left.result(), right.result(), x->operand()); break; + case Bytecodes::_ior: // fall through + case Bytecodes::_lor: + __ logical_or(left.result(), right.result(), x->operand()); break; + case Bytecodes::_ixor: // fall through + case Bytecodes::_lxor: + __ logical_xor(left.result(), right.result(), x->operand()); break; + default: Unimplemented(); + } +} + +// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg +void LIRGenerator::do_CompareOp(CompareOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + ValueTag tag = x->x()->type()->tag(); + if (tag == longTag) { + left.set_destroys_register(); + } + left.load_item(); + right.load_item(); + LIR_Opr reg = rlock_result(x); + + if (x->x()->type()->is_float_kind()) { + Bytecodes::Code code = x->op(); + __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl)); + } else if (x->x()->type()->tag() == longTag) { + __ lcmp2int(left.result(), right.result(), reg); + } else { + Unimplemented(); + } +} + +LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr, LIRItem& cmp_value, LIRItem& new_value) { + LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience + new_value.load_item(); + cmp_value.load_item(); + LIR_Opr result = new_register(T_INT); + if (is_reference_type(type)) { + __ cas_obj(addr, cmp_value.result(), new_value.result(), new_register(T_INT), new_register(T_INT), result); + } else if (type == T_INT) { + __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill); + } else if (type == T_LONG) { + __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill); + } else { + ShouldNotReachHere(); + } + __ logical_xor(FrameMap::r5_opr, LIR_OprFact::intConst(1), result); + return result; +} + +LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) { + bool is_oop = is_reference_type(type); + LIR_Opr result = new_register(type); + value.load_item(); + assert(type == T_INT || is_oop LP64_ONLY( || type == T_LONG ), "unexpected type"); + LIR_Opr tmp = new_register(T_INT); + __ xchg(addr, value.result(), result, tmp); + return result; +} + +LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) { + LIR_Opr result = new_register(type); + value.load_item(); + assert(type == T_INT LP64_ONLY( || type == T_LONG ), "unexpected type"); + LIR_Opr tmp = new_register(T_INT); + __ xadd(addr, value.result(), result, tmp); + return result; +} + +void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), + "wrong type"); + + switch (x->id()) { + case vmIntrinsics::_dexp: // fall through + case vmIntrinsics::_dlog: // fall through + case vmIntrinsics::_dpow: // fall through + case vmIntrinsics::_dcos: // fall through + case vmIntrinsics::_dsin: // fall through + case vmIntrinsics::_dtan: // fall through + case vmIntrinsics::_dlog10: + do_LibmIntrinsic(x); + break; + case vmIntrinsics::_dabs: // fall through + case vmIntrinsics::_dsqrt: { + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + LIR_Opr dst = rlock_result(x); + + switch (x->id()) { + case vmIntrinsics::_dsqrt: { + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + case vmIntrinsics::_dabs: { + __ abs(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + default: + ShouldNotReachHere(); + } + break; + } + default: + ShouldNotReachHere(); + } +} + +void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { + LIRItem value(x->argument_at(0), this); + value.set_destroys_register(); + LIR_Opr calc_result = rlock_result(x); + LIR_Opr result_reg = result_register_for(x->type()); + CallingConvention* cc = NULL; + BasicTypeList signature(1); + signature.append(T_DOUBLE); + if (x->id() == vmIntrinsics::_dpow) { signature.append(T_DOUBLE); } + cc = frame_map()->c_calling_convention(&signature); + value.load_item_force(cc->at(0)); + if (x->id() == vmIntrinsics::_dpow) { + LIRItem value1(x->argument_at(1), this); + value1.set_destroys_register(); + value1.load_item_force(cc->at(1)); + } + switch (x->id()) { + case vmIntrinsics::_dexp: + if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); } + else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args()); } + break; + case vmIntrinsics::_dlog: + if (StubRoutines::dlog() != NULL) { __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args()); } + else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args()); } + break; + case vmIntrinsics::_dlog10: + if (StubRoutines::dlog10() != NULL) { __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args()); } + else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args()); } + break; + case vmIntrinsics::_dsin: + if (StubRoutines::dsin() != NULL) { __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args()); } + else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args()); } + break; + case vmIntrinsics::_dcos: + if (StubRoutines::dcos() != NULL) { __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args()); } + else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); } + break; + case vmIntrinsics::_dtan: + if (StubRoutines::dtan() != NULL) { __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); } + else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); } + break; + case vmIntrinsics::_dpow: + if (StubRoutines::dpow() != NULL) { __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); } + else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); } + break; + default: ShouldNotReachHere(); + } + __ move(result_reg, calc_result); +} + + +void LIRGenerator::do_ArrayCopy(Intrinsic* x) { + assert(x->number_of_arguments() == 5, "wrong type"); + + // Make all state_for calls early since they can emit code + CodeEmitInfo* info = state_for(x, x->state()); + + LIRItem src(x->argument_at(0), this); + LIRItem src_pos(x->argument_at(1), this); + LIRItem dst(x->argument_at(2), this); + LIRItem dst_pos(x->argument_at(3), this); + LIRItem length(x->argument_at(4), this); + + // operands for arraycopy must use fixed registers, otherwise + // LinearScan will fail allocation (because arraycopy always needs a + // call) + + // The java calling convention will give us enough registers + // so that on the stub side the args will be perfect already. + // On the other slow/special case side we call C and the arg + // positions are not similar enough to pick one as the best. + // Also because the java calling convention is a "shifted" version + // of the C convention we can process the java args trivially into C + // args without worry of overwriting during the xfer + + src.load_item_force (FrameMap::as_oop_opr(j_rarg0)); + src_pos.load_item_force (FrameMap::as_opr(j_rarg1)); + dst.load_item_force (FrameMap::as_oop_opr(j_rarg2)); + dst_pos.load_item_force (FrameMap::as_opr(j_rarg3)); + length.load_item_force (FrameMap::as_opr(j_rarg4)); + + LIR_Opr tmp = FrameMap::as_opr(j_rarg5); + + set_no_result(x); + + int flags; + ciArrayKlass* expected_type = NULL; + arraycopy_helper(x, &flags, &expected_type); + + __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), tmp, + expected_type, flags, info); // does add_safepoint +} + +void LIRGenerator::do_update_CRC32(Intrinsic* x) { + ShouldNotReachHere(); +} + +void LIRGenerator::do_update_CRC32C(Intrinsic* x) { + ShouldNotReachHere(); +} + +void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) { + assert(x->number_of_arguments() == 3, "wrong type"); + assert(UseFMA, "Needs FMA instructions support."); + LIRItem value(x->argument_at(0), this); + LIRItem value1(x->argument_at(1), this); + LIRItem value2(x->argument_at(2), this); + + value.load_item(); + value1.load_item(); + value2.load_item(); + + LIR_Opr calc_input = value.result(); + LIR_Opr calc_input1 = value1.result(); + LIR_Opr calc_input2 = value2.result(); + LIR_Opr calc_result = rlock_result(x); + + switch (x->id()) { + case vmIntrinsics::_fmaD: __ fmad(calc_input, calc_input1, calc_input2, calc_result); break; + case vmIntrinsics::_fmaF: __ fmaf(calc_input, calc_input1, calc_input2, calc_result); break; + default: ShouldNotReachHere(); + } +} + +void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) { + fatal("vectorizedMismatch intrinsic is not implemented on this platform"); +} + +// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f +// _i2b, _i2c, _i2s +void LIRGenerator::do_Convert(Convert* x) { + LIRItem value(x->value(), this); + value.load_item(); + LIR_Opr input = value.result(); + LIR_Opr result = rlock(x); + + // arguments of lir_convert + LIR_Opr conv_input = input; + LIR_Opr conv_result = result; + + __ convert(x->op(), conv_input, conv_result); + + assert(result->is_virtual(), "result must be virtual register"); + set_result(x, result); +} + +void LIRGenerator::do_NewInstance(NewInstance* x) { +#ifndef PRODUCT + if (PrintNotLoaded && !x->klass()->is_loaded()) { + tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci()); + } +#endif + CodeEmitInfo* info = state_for(x, x->state()); + LIR_Opr reg = result_register_for(x->type()); + new_instance(reg, x->klass(), x->is_unresolved(), + FrameMap::r12_oop_opr, + FrameMap::r15_oop_opr, + FrameMap::r14_oop_opr, + LIR_OprFact::illegalOpr, + FrameMap::r13_metadata_opr, + info); + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { + CodeEmitInfo* info = state_for(x, x->state()); + + LIRItem length(x->length(), this); + length.load_item_force(FrameMap::r9_opr); + + LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::r12_oop_opr; + LIR_Opr tmp2 = FrameMap::r14_oop_opr; + LIR_Opr tmp3 = FrameMap::r15_oop_opr; + LIR_Opr tmp4 = reg; + LIR_Opr klass_reg = FrameMap::r13_metadata_opr; + LIR_Opr len = length.result(); + BasicType elem_type = x->elt_type(); + + __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg); + + CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { + LIRItem length(x->length(), this); + // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction + // and therefore provide the state before the parameters have been consumed + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + } + + CodeEmitInfo* info = state_for(x, x->state()); + + LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::r12_oop_opr; + LIR_Opr tmp2 = FrameMap::r14_oop_opr; + LIR_Opr tmp3 = FrameMap::r15_oop_opr; + LIR_Opr tmp4 = reg; + LIR_Opr klass_reg = FrameMap::r13_metadata_opr; + + length.load_item_force(FrameMap::r9_opr); + LIR_Opr len = length.result(); + + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); + ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); + if (obj == ciEnv::unloaded_ciobjarrayklass()) { + BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); + } + klass2reg_with_patching(klass_reg, obj, patching_info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + + +void LIRGenerator::do_NewMultiArray(NewMultiArray* x) { + Values* dims = x->dims(); + int i = dims->length(); + LIRItemList* items = new LIRItemList(i, i, NULL); + while (i-- > 0) { + LIRItem* size = new LIRItem(dims->at(i), this); + items->at_put(i, size); + } + + // Evaluate state_for early since it may emit code. + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + + // Cannot re-use same xhandlers for multiple CodeEmitInfos, so + // clone all handlers (NOTE: Usually this is handled transparently + // by the CodeEmitInfo cloning logic in CodeStub constructors but + // is done explicitly here because a stub isn't being used). + x->set_exception_handlers(new XHandlers(x->exception_handlers())); + } + CodeEmitInfo* info = state_for(x, x->state()); + + i = dims->length(); + while (i-- > 0) { + LIRItem* size = items->at(i); + size->load_item(); + + store_stack_parameter(size->result(), in_ByteSize(i * BytesPerInt)); + } + + LIR_Opr klass_reg = FrameMap::r10_metadata_opr; + klass2reg_with_patching(klass_reg, x->klass(), patching_info); + + LIR_Opr rank = FrameMap::r9_opr; + __ move(LIR_OprFact::intConst(x->rank()), rank); + LIR_Opr varargs = FrameMap::r12_opr; + __ move(FrameMap::sp_opr, varargs); + LIR_OprList* args = new LIR_OprList(3); + args->append(klass_reg); + args->append(rank); + args->append(varargs); + LIR_Opr reg = result_register_for(x->type()); + __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id), + LIR_OprFact::illegalOpr, + reg, args, info); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_BlockBegin(BlockBegin* x) { + // nothing to do for now +} + +void LIRGenerator::do_CheckCast(CheckCast* x) { + LIRItem obj(x->obj(), this); + + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || + (PatchALot && !x->is_incompatible_class_change_check() && !x->is_invokespecial_receiver_check())) { + // must do this before locking the destination register as an oop register, + // and before the obj is loaded (the latter is for deoptimization) + patching_info = state_for(x, x->state_before()); + } + obj.load_item(); + + // info for exceptions + CodeEmitInfo* info_for_exception = + (x->needs_exception_state() ? state_for(x) : + state_for(x, x->state_before(), true /*ignore_xhandler*/ )); + + CodeStub* stub = NULL; + if (x->is_incompatible_class_change_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, + info_for_exception); + } else if (x->is_invokespecial_receiver_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new DeoptimizeStub(info_for_exception, + Deoptimization::Reason_class_check, + Deoptimization::Action_none); + } else { + stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception); + } + LIR_Opr reg = rlock_result(x); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + if (!x->klass()->is_loaded() || UseCompressedClassPointers) { + tmp3 = new_register(objectType); + } + __ checkcast(reg, obj.result(), x->klass(), + new_register(objectType), new_register(objectType), tmp3, + x->direct_compare(), info_for_exception, patching_info, stub, + x->profiled_method(), x->profiled_bci()); +} + +void LIRGenerator::do_InstanceOf(InstanceOf* x) { + LIRItem obj(x->obj(), this); + + // result and test object may not be in same register + LIR_Opr reg = rlock_result(x); + CodeEmitInfo* patching_info = NULL; + if ((!x->klass()->is_loaded() || PatchALot)) { + // must do this before locking the destination register as an oop register + patching_info = state_for(x, x->state_before()); + } + obj.load_item(); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + if (!x->klass()->is_loaded() || UseCompressedClassPointers) { + tmp3 = new_register(objectType); + } + __ instanceof(reg, obj.result(), x->klass(), + new_register(objectType), new_register(objectType), tmp3, + x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci()); +} + +void LIRGenerator::do_If(If* x) { + // If should have two successors + assert(x->number_of_sux() == 2, "inconsistency"); + ValueTag tag = x->x()->type()->tag(); + bool is_safepoint = x->is_safepoint(); + + If::Condition cond = x->cond(); + + LIRItem xitem(x->x(), this); + LIRItem yitem(x->y(), this); + LIRItem* xin = &xitem; + LIRItem* yin = &yitem; + + if (tag == longTag) { + // for longs, only conditions "eql", "neq", "lss", "geq" are valid; + // mirror for other conditions + if (cond == If::gtr || cond == If::leq) { + cond = Instruction::mirror(cond); + xin = &yitem; + yin = &xitem; + } + xin->set_destroys_register(); + } + xin->load_item(); + yin->load_item(); + + set_no_result(x); + + LIR_Opr left = xin->result(); + LIR_Opr right = yin->result(); + + // add safepoint before generating condition code so it can be recomputed + if (x->is_safepoint()) { + // increment backedge counter if needed + increment_backedge_counter_conditionally(lir_cond(cond), left, right, state_for(x, x->state_before()), + x->tsux()->bci(), x->fsux()->bci(), x->profiled_bci()); + __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); + } + + // Generate branch profiling. Profiling code doesn't kill flags. + __ cmp(lir_cond(cond), left, right); + profile_branch(x, cond); + move_to_phi(x->state()); + if (x->x()->type()->is_float_kind()) { + __ branch(lir_cond(cond), x->tsux(), x->usux()); + } else { + __ branch(lir_cond(cond), x->tsux()); + } + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); +} + +LIR_Opr LIRGenerator::getThreadPointer() { + return FrameMap::as_pointer_opr(xthread); +} + +void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); } + +void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, + CodeEmitInfo* info) { + __ volatile_store_mem_reg(value, address, info); +} + +void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, + CodeEmitInfo* info) { + __ volatile_load_mem_reg(address, result, info); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/register.hpp" +#include "c1/c1_LIR.hpp" + +FloatRegister LIR_OprDesc::as_float_reg() const { + return as_FloatRegister(fpu_regnr()); +} + +FloatRegister LIR_OprDesc::as_double_reg() const { + return as_FloatRegister(fpu_regnrLo()); +} + +// Reg2 unused. +LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { + assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); + return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | + (reg1 << LIR_OprDesc::reg2_shift) | + LIR_OprDesc::double_type | + LIR_OprDesc::fpu_register | + LIR_OprDesc::double_size); +} + +#ifndef PRODUCT +void LIR_Address::verify() const { + assert(base()->is_cpu_register(), "wrong base operand"); + assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand"); + assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || base()->type() == T_LONG || + base()->type() == T_METADATA, "wrong type for addresses"); +} +#endif // PRODUCT Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LinearScan.hpp" +#include "utilities/bitMap.inline.hpp" + +void LinearScan::allocate_fpu_stack() { + // No FPU stack on RISCV +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_LINEARSCAN_RISCV_HPP +#define CPU_RISCV_C1_LINEARSCAN_RISCV_HPP + +inline bool LinearScan::is_processed_reg_num(int reg_num) +{ + return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map; +} + +inline int LinearScan::num_physical_regs(BasicType type) { + return 1; +} + +inline bool LinearScan::requires_adjacent_regs(BasicType type) { + return false; +} + +inline bool LinearScan::is_caller_save(int assigned_reg) { + assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers"); + if (assigned_reg < pd_first_callee_saved_reg) { + return true; + } + if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg_1) { + return true; + } + if (assigned_reg > pd_last_callee_saved_fpu_reg_1 && assigned_reg < pd_first_callee_saved_fpu_reg_2) { + return true; + } + if (assigned_reg > pd_last_callee_saved_fpu_reg_2 && assigned_reg < pd_last_fpu_reg) { + return true; + } + return false; +} + +inline void LinearScan::pd_add_temps(LIR_Op* op) { + // No special case behaviours yet +} + + +// Implementation of LinearScanWalker + +inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) +{ + if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) { + assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only"); + _first_reg = pd_first_callee_saved_reg; + _last_reg = pd_last_callee_saved_reg; + return true; + } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || + cur->type() == T_ADDRESS || cur->type() == T_METADATA) { + _first_reg = pd_first_cpu_reg; + _last_reg = pd_last_allocatable_cpu_reg; + return true; + } + return false; +} + +#endif // CPU_RISCV_C1_LINEARSCAN_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp @@ -0,0 +1,455 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_LIR.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "classfile/systemDictionary.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markWord.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" + +void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result, + FloatRegister freg0, FloatRegister freg1, + Register result) +{ + if (is_float) { + float_compare(result, freg0, freg1, unordered_result); + } else { + double_compare(result, freg0, freg1, unordered_result); + } +} + +int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { + const int aligned_mask = BytesPerWord - 1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done; + int null_check_offset = -1; + + verify_oop(obj); + + // save object being locked into the BasicObjectLock + sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + + null_check_offset = offset(); + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(hdr, obj); + lwu(hdr, Address(hdr, Klass::access_flags_offset())); + andi(t0, hdr, JVM_ACC_IS_VALUE_BASED_CLASS); + bnez(t0, slow_case, true /* is_far */); + } + + if (UseBiasedLocking) { + assert(scratch != noreg, "should have scratch register at this point"); + biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); + } + + // Load object header + ld(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked + ori(hdr, hdr, markWord::unlocked_value); + // save unlocked object header into the displaced header location on the stack + sd(hdr, Address(disp_hdr, 0)); + // test if object header is still the same (i.e. unlocked), and if so, store the + // displaced header address in the object header - if it is not the same, get the + // object header instead + la(t1, Address(obj, hdr_offset)); + cmpxchgptr(hdr, disp_hdr, t1, t0, done, /*fallthough*/NULL); + // if the object header was the same, we're done + // if the object header was not the same, it is now in the hdr register + // => test if it is a stack pointer into the same stack (recursive locking), i.e.: + // + // 1) (hdr & aligned_mask) == 0 + // 2) sp <= hdr + // 3) hdr <= sp + page_size + // + // these 3 tests can be done by evaluating the following expression: + // + // (hdr -sp) & (aligned_mask - page_size) + // + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + sub(hdr, hdr, sp); + li(t0, aligned_mask - os::vm_page_size()); + andr(hdr, hdr, t0); + // for recursive locking, the result is zero => save it in the displaced header + // location (NULL in the displaced hdr location indicates recursive locking) + sd(hdr, Address(disp_hdr, 0)); + // otherwise we don't care about the result and handle locking via runtime call + bnez(hdr, slow_case, /* is_far */ true); + bind(done); + if (PrintBiasedLockingStatistics) { + la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); + add_memory_int32(Address(t1, 0), 1); + } + return null_check_offset; +} + +void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { + const int aligned_mask = BytesPerWord - 1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done; + + if (UseBiasedLocking) { + // load object + ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + biased_locking_exit(obj, hdr, done); + } + + // load displaced header + ld(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is NULL we had recursive locking + // if we had recursive locking, we are done + beqz(hdr, done); + if (!UseBiasedLocking) { + // load object + ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + } + verify_oop(obj); + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to + // the displaced header, get the object header instead + // if the object header was not pointing to the displaced header, + // we do unlocking via runtime call + if (hdr_offset) { + la(t0, Address(obj, hdr_offset)); + cmpxchgptr(disp_hdr, hdr, t0, t1, done, &slow_case); + } else { + cmpxchgptr(disp_hdr, hdr, obj, t1, done, &slow_case); + } + bind(done); +} + +// Defines obj, preserves var_size_in_bytes +void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register tmp1, Register tmp2, Label& slow_case) { + if (UseTLAB) { + tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, /* is_far */ true); + } else { + eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, tmp1, slow_case, /* is_far */ true); + } +} + +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) { + assert_different_registers(obj, klass, len); + if (UseBiasedLocking && !len->is_valid()) { + assert_different_registers(obj, klass, len, tmp1, tmp2); + ld(tmp1, Address(klass, Klass::prototype_header_offset())); + } else { + // This assumes that all prototype bits fitr in an int32_t + mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value()); + } + sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); + + if (UseCompressedClassPointers) { // Take care not to kill klass + encode_klass_not_null(tmp1, klass); + sw(tmp1, Address(obj, oopDesc::klass_offset_in_bytes())); + } else { + sd(klass, Address(obj, oopDesc::klass_offset_in_bytes())); + } + + if (len->is_valid()) { + sw(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); + } else if (UseCompressedClassPointers) { + store_klass_gap(obj, zr); + } +} + +// preserves obj, destroys len_in_bytes +void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp) { + assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); + Label done; + + // len_in_bytes is positive and ptr sized + sub(len_in_bytes, len_in_bytes, hdr_size_in_bytes); + beqz(len_in_bytes, done); + + // Preserve obj + if (hdr_size_in_bytes) { + add(obj, obj, hdr_size_in_bytes); + } + zero_memory(obj, len_in_bytes, tmp); + if (hdr_size_in_bytes) { + sub(obj, obj, hdr_size_in_bytes); + } + + bind(done); +} + +void C1_MacroAssembler::allocate_object(Register obj, Register tmp1, Register tmp2, int header_size, int object_size, Register klass, Label& slow_case) { + assert_different_registers(obj, tmp1, tmp2); + assert(header_size >= 0 && object_size >= header_size, "illegal sizes"); + + try_allocate(obj, noreg, object_size * BytesPerWord, tmp1, tmp2, slow_case); + + initialize_object(obj, klass, noreg, object_size * HeapWordSize, tmp1, tmp2, UseTLAB); +} + +void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register tmp1, Register tmp2, bool is_tlab_allocated) { + assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, + "con_size_in_bytes is not multiple of alignment"); + const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; + + initialize_header(obj, klass, noreg, tmp1, tmp2); + + if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { + // clear rest of allocated space + const Register index = tmp2; + // 16: multipler for threshold + const int threshold = 16 * BytesPerWord; // approximate break even point for code size (see comments below) + if (var_size_in_bytes != noreg) { + mv(index, var_size_in_bytes); + initialize_body(obj, index, hdr_size_in_bytes, tmp1); + } else if (con_size_in_bytes <= threshold) { + // use explicit null stores + int i = hdr_size_in_bytes; + if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) { // 2: multipler for BytesPerWord + sd(zr, Address(obj, i)); + i += BytesPerWord; + } + for (; i < con_size_in_bytes; i += BytesPerWord) { + sd(zr, Address(obj, i)); + } + } else if (con_size_in_bytes > hdr_size_in_bytes) { + block_comment("zero memory"); + // use loop to null out the fields + int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord; + mv(index, words / 8); // 8: byte size + + const int unroll = 8; // Number of sd(zr) instructions we'll unroll + int remainder = words % unroll; + la(t0, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord)); + + Label entry_point, loop; + j(entry_point); + + bind(loop); + sub(index, index, 1); + for (int i = -unroll; i < 0; i++) { + if (-i == remainder) { + bind(entry_point); + } + sd(zr, Address(t0, i * wordSize)); + } + if (remainder == 0) { + bind(entry_point); + } + add(t0, t0, unroll * wordSize); + bnez(index, loop); + } + } + + membar(MacroAssembler::StoreStore); + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == x10, "must be"); + far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id))); + } + + verify_oop(obj); +} + +void C1_MacroAssembler::allocate_array(Register obj, Register len, Register tmp1, Register tmp2, int header_size, int f, Register klass, Label& slow_case) { + assert_different_registers(obj, len, tmp1, tmp2, klass); + + // determine alignment mask + assert(!(BytesPerWord & 1), "must be multiple of 2 for masking code to work"); + + // check for negative or excessive length + mv(t0, (int32_t)max_array_allocation_length); + bgeu(len, t0, slow_case, /* is_far */ true); + + const Register arr_size = tmp2; // okay to be the same + // align object end + mv(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask); + shadd(arr_size, len, arr_size, t0, f); + andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask); + + try_allocate(obj, arr_size, 0, tmp1, tmp2, slow_case); + + initialize_header(obj, klass, len, tmp1, tmp2); + + // clear rest of allocated space + const Register len_zero = len; + initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero); + + membar(MacroAssembler::StoreStore); + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == x10, "must be"); + far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id))); + } + + verify_oop(obj); +} + +void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache, Label &L) { + verify_oop(receiver); + // explicit NULL check not needed since load from [klass_offset] causes a trap + // check against inline cache + assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check"); + cmp_klass(receiver, iCache, t0, L); +} + +void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { + assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); + // Make sure there is enough stack space for this method's activation. + // Note that we do this before creating a frame. + generate_stack_overflow_check(bang_size_in_bytes); + MacroAssembler::build_frame(framesize); + + // Insert nmethod entry barrier into frame. + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->nmethod_entry_barrier(this); +} + +void C1_MacroAssembler::remove_frame(int framesize) { + MacroAssembler::remove_frame(framesize); +} + + +void C1_MacroAssembler::verified_entry(bool breakAtEntry) { + // If we have to make this method not-entrant we'll overwrite its + // first instruction with a jump. For this action to be legal we + // must ensure that this first instruction is a J, JAL or NOP. + // Make it a NOP. + + nop(); +} + +void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { + // fp + -2: link + // + -1: return address + // + 0: argument with offset 0 + // + 1: argument with offset 1 + // + 2: ... + ld(reg, Address(fp, offset_in_words * BytesPerWord)); +} + +#ifndef PRODUCT + +void C1_MacroAssembler::verify_stack_oop(int stack_offset) { + if (!VerifyOops) { + return; + } + verify_oop_addr(Address(sp, stack_offset), "oop"); +} + +void C1_MacroAssembler::verify_not_null_oop(Register r) { + if (!VerifyOops) return; + Label not_null; + bnez(r, not_null); + stop("non-null oop required"); + bind(not_null); + verify_oop(r); +} + +void C1_MacroAssembler::invalidate_registers(bool inv_x10, bool inv_x9, bool inv_x12, bool inv_x13, bool inv_x14, bool inv_x15) { +#ifdef ASSERT + static int nn; + if (inv_x10) { mv(x10, 0xDEAD); } + if (inv_x9) { mv(x9, 0xDEAD); } + if (inv_x12) { mv(x12, nn++); } + if (inv_x13) { mv(x13, 0xDEAD); } + if (inv_x14) { mv(x14, 0xDEAD); } + if (inv_x15) { mv(x15, 0xDEAD); } +#endif // ASSERT +} +#endif // ifndef PRODUCT + +typedef void (C1_MacroAssembler::*c1_cond_branch_insn)(Register op1, Register op2, Label& label, bool is_far); +typedef void (C1_MacroAssembler::*c1_float_cond_branch_insn)(FloatRegister op1, FloatRegister op2, + Label& label, bool is_far, bool is_unordered); + +static c1_cond_branch_insn c1_cond_branch[] = +{ + /* SHORT branches */ + (c1_cond_branch_insn)&Assembler::beq, + (c1_cond_branch_insn)&Assembler::bne, + (c1_cond_branch_insn)&Assembler::blt, + (c1_cond_branch_insn)&Assembler::ble, + (c1_cond_branch_insn)&Assembler::bge, + (c1_cond_branch_insn)&Assembler::bgt, + (c1_cond_branch_insn)&Assembler::bleu, // lir_cond_belowEqual + (c1_cond_branch_insn)&Assembler::bgeu // lir_cond_aboveEqual +}; + +static c1_float_cond_branch_insn c1_float_cond_branch[] = +{ + /* FLOAT branches */ + (c1_float_cond_branch_insn)&MacroAssembler::float_beq, + (c1_float_cond_branch_insn)&MacroAssembler::float_bne, + (c1_float_cond_branch_insn)&MacroAssembler::float_blt, + (c1_float_cond_branch_insn)&MacroAssembler::float_ble, + (c1_float_cond_branch_insn)&MacroAssembler::float_bge, + (c1_float_cond_branch_insn)&MacroAssembler::float_bgt, + NULL, // lir_cond_belowEqual + NULL, // lir_cond_aboveEqual + + /* DOUBLE branches */ + (c1_float_cond_branch_insn)&MacroAssembler::double_beq, + (c1_float_cond_branch_insn)&MacroAssembler::double_bne, + (c1_float_cond_branch_insn)&MacroAssembler::double_blt, + (c1_float_cond_branch_insn)&MacroAssembler::double_ble, + (c1_float_cond_branch_insn)&MacroAssembler::double_bge, + (c1_float_cond_branch_insn)&MacroAssembler::double_bgt, + NULL, // lir_cond_belowEqual + NULL // lir_cond_aboveEqual +}; + +void C1_MacroAssembler::c1_cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, + BasicType type, bool is_far) { + if (type == T_OBJECT || type == T_ARRAY) { + assert(cmpFlag == lir_cond_equal || cmpFlag == lir_cond_notEqual, "Should be equal or notEqual"); + if (cmpFlag == lir_cond_equal) { + beq(op1, op2, label, is_far); + } else { + bne(op1, op2, label, is_far); + } + } else { + assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(c1_cond_branch) / sizeof(c1_cond_branch[0])), + "invalid c1 conditional branch index"); + (this->*c1_cond_branch[cmpFlag])(op1, op2, label, is_far); + } +} + +void C1_MacroAssembler::c1_float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, + bool is_far, bool is_unordered) { + assert(cmpFlag >= 0 && + cmpFlag < (int)(sizeof(c1_float_cond_branch) / sizeof(c1_float_cond_branch[0])), + "invalid c1 float conditional branch index"); + (this->*c1_float_cond_branch[cmpFlag])(op1, op2, label, is_far, is_unordered); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP +#define CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP + +using MacroAssembler::build_frame; +using MacroAssembler::null_check; + +// C1_MacroAssembler contains high-level macros for C1 + + private: + int _rsp_offset; // track rsp changes + // initialization + void pd_init() { _rsp_offset = 0; } + + + public: + void try_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register tmp1, // temp register + Register tmp2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + + void initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2); + void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp); + + void float_cmp(bool is_float, int unordered_result, + FloatRegister f0, FloatRegister f1, + Register result); + + // locking + // hdr : must be x10, contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must point to the displaced header location, contents preserved + // scratch : scratch register, contents destroyed + // returns code offset at which to add null check debug information + int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); + + // unlocking + // hdr : contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must be x10 & must point to the displaced header location, contents destroyed + void unlock_object(Register swap, Register obj, Register lock, Label& slow_case); + + void initialize_object( + Register obj, // result: pointer to object after successful allocation + Register klass, // object klass + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register tmp1, // temp register + Register tmp2, // temp register + bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB + ); + + // allocation of fixed-size objects + // (can also be used to allocate fixed-size arrays, by setting + // hdr_size correctly and storing the array length afterwards) + // obj : will contain pointer to allocated object + // t1, t2 : temp registers - contents destroyed + // header_size: size of object header in words + // object_size: total size of object in words + // slow_case : exit to slow case implementation if fast allocation fails + void allocate_object(Register obj, Register tmp1, Register tmp2, int header_size, int object_size, Register klass, Label& slow_case); + + enum { + max_array_allocation_length = 0x00FFFFFF + }; + + // allocation of arrays + // obj : will contain pointer to allocated object + // len : array length in number of elements + // t : temp register - contents destroyed + // header_size: size of object header in words + // f : element scale factor + // slow_case : exit to slow case implementation if fast allocation fails + void allocate_array(Register obj, Register len, Register tmp1, Register tmp2, int header_size, int f, Register klass, Label& slow_case); + + int rsp_offset() const { return _rsp_offset; } + + void invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) PRODUCT_RETURN; + + // This platform only uses signal-based null checks. The Label is not needed. + void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); } + + void load_parameter(int offset_in_words, Register reg); + + void inline_cache_check(Register receiver, Register iCache, Label &L); + + static const int c1_double_branch_mask = 1 << 3; // depend on c1_float_cond_branch + void c1_cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, BasicType type, bool is_far); + void c1_float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, + bool is_far, bool is_unordered = false); + +#endif // CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp @@ -0,0 +1,1172 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_Defs.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "compiler/disassembler.hpp" +#include "compiler/oopMap.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/universe.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "register_riscv.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframe.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/powerOfTwo.hpp" +#include "vmreg_riscv.inline.hpp" + + +// Implementation of StubAssembler + +int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, int args_size) { + // setup registers + assert(!(oop_result->is_valid() || metadata_result->is_valid()) || oop_result != metadata_result, + "registers must be different"); + assert(oop_result != xthread && metadata_result != xthread, "registers must be different"); + assert(args_size >= 0, "illegal args_size"); + bool align_stack = false; + + mv(c_rarg0, xthread); + set_num_rt_args(0); // Nothing on stack + + Label retaddr; + set_last_Java_frame(sp, fp, retaddr, t0); + + // do the call + int32_t off = 0; + la_patchable(t0, RuntimeAddress(entry), off); + jalr(x1, t0, off); + bind(retaddr); + int call_offset = offset(); + // verify callee-saved register +#ifdef ASSERT + push_reg(x10, sp); + { Label L; + get_thread(x10); + beq(xthread, x10, L); + stop("StubAssembler::call_RT: xthread not callee saved?"); + bind(L); + } + pop_reg(x10, sp); +#endif + reset_last_Java_frame(true); + + // check for pending exceptions + { Label L; + // check for pending exceptions (java_thread is set upon return) + ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + beqz(t0, L); + // exception pending => remove activation and forward to exception handler + // make sure that the vm_results are cleared + if (oop_result->is_valid()) { + sd(zr, Address(xthread, JavaThread::vm_result_offset())); + } + if (metadata_result->is_valid()) { + sd(zr, Address(xthread, JavaThread::vm_result_2_offset())); + } + if (frame_size() == no_frame_size) { + leave(); + far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + } else if (_stub_id == Runtime1::forward_exception_id) { + should_not_reach_here(); + } else { + far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); + } + bind(L); + } + // get oop results if there are any and reset the values in the thread + if (oop_result->is_valid()) { + get_vm_result(oop_result, xthread); + } + if (metadata_result->is_valid()) { + get_vm_result_2(metadata_result, xthread); + } + return call_offset; +} + +int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1) { + mv(c_rarg1, arg1); + return call_RT(oop_result, metadata_result, entry, 1); +} + +int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2) { + const int arg_num = 2; + if (c_rarg1 == arg2) { + if (c_rarg2 == arg1) { + xorr(arg1, arg1, arg2); + xorr(arg2, arg1, arg2); + xorr(arg1, arg1, arg2); + } else { + mv(c_rarg2, arg2); + mv(c_rarg1, arg1); + } + } else { + mv(c_rarg1, arg1); + mv(c_rarg2, arg2); + } + return call_RT(oop_result, metadata_result, entry, arg_num); +} + +int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { + const int arg_num = 3; + // if there is any conflict use the stack + if (arg1 == c_rarg2 || arg1 == c_rarg3 || + arg2 == c_rarg1 || arg2 == c_rarg3 || + arg3 == c_rarg1 || arg3 == c_rarg2) { + const int arg1_sp_offset = 0; + const int arg2_sp_offset = 1; + const int arg3_sp_offset = 2; + addi(sp, sp, -(arg_num + 1) * wordSize); + sd(arg1, Address(sp, arg1_sp_offset * wordSize)); + sd(arg2, Address(sp, arg2_sp_offset * wordSize)); + sd(arg3, Address(sp, arg3_sp_offset * wordSize)); + + ld(c_rarg1, Address(sp, arg1_sp_offset * wordSize)); + ld(c_rarg2, Address(sp, arg2_sp_offset * wordSize)); + ld(c_rarg3, Address(sp, arg3_sp_offset * wordSize)); + addi(sp, sp, (arg_num + 1) * wordSize); + } else { + mv(c_rarg1, arg1); + mv(c_rarg2, arg2); + mv(c_rarg3, arg3); + } + return call_RT(oop_result, metadata_result, entry, arg_num); +} + +enum return_state_t { + does_not_return, requires_return +}; + +// Implementation of StubFrame + +class StubFrame: public StackObj { + private: + StubAssembler* _sasm; + bool _return_state; + + public: + StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return); + void load_argument(int offset_in_words, Register reg); + + ~StubFrame(); +};; + +void StubAssembler::prologue(const char* name, bool must_gc_arguments) { + set_info(name, must_gc_arguments); + enter(); +} + +void StubAssembler::epilogue() { + leave(); + ret(); +} + +#define __ _sasm-> + +StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state) { + _sasm = sasm; + _return_state = return_state; + __ prologue(name, must_gc_arguments); +} + +// load parameters that were stored with LIR_Assembler::store_parameter +// Note: offsets for store_parameter and load_argument must match +void StubFrame::load_argument(int offset_in_words, Register reg) { + __ load_parameter(offset_in_words, reg); +} + + +StubFrame::~StubFrame() { + if (_return_state == requires_return) { + __ epilogue(); + } else { + __ should_not_reach_here(); + } + _sasm = NULL; +} + +#undef __ + + +// Implementation of Runtime1 + +#define __ sasm-> + +const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2; + +// Stack layout for saving/restoring all the registers needed during a runtime +// call (this includes deoptimization) +// Note: note that users of this frame may well have arguments to some runtime +// while these values are on the stack. These positions neglect those arguments +// but the code in save_live_registers will take the argument count into +// account. +// + +enum reg_save_layout { + reg_save_frame_size = 32 /* float */ + 30 /* integer excluding x3, x4 */ +}; + +// Save off registers which might be killed by calls into the runtime. +// Tries to smart of about FPU registers. In particular we separate +// saving and describing the FPU registers for deoptimization since we +// have to save the FPU registers twice if we describe them. The +// deopt blob is the only thing which needs to describe FPU registers. +// In all other cases it should be sufficient to simply save their +// current value. + +static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs]; +static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs]; + +static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { + int frame_size_in_bytes = reg_save_frame_size * BytesPerWord; + sasm->set_frame_size(frame_size_in_bytes / BytesPerWord); + int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); + OopMap* oop_map = new OopMap(frame_size_in_slots, 0); + assert_cond(oop_map != NULL); + + // caller save registers only, see FrameMap::initialize + // in c1_FrameMap_riscv.cpp for detail. + const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = { + x7, x10, x11, x12, x13, x14, x15, x16, x17, x28, x29, x30, x31 + }; + + for (int i = 0; i < FrameMap::max_nof_caller_save_cpu_regs; i++) { + Register r = caller_save_cpu_regs[i]; + int sp_offset = cpu_reg_save_offsets[r->encoding()]; + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), + r->as_VMReg()); + } + + // fpu_regs + if (save_fpu_registers) { + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + FloatRegister r = as_FloatRegister(i); + int sp_offset = fpu_reg_save_offsets[i]; + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), + r->as_VMReg()); + } + } + return oop_map; +} + +static OopMap* save_live_registers(StubAssembler* sasm, + bool save_fpu_registers = true) { + __ block_comment("save_live_registers"); + + // if the number of pushed regs is odd, one slot will be reserved for alignment + __ push_reg(RegSet::range(x5, x31), sp); // integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4) + + if (save_fpu_registers) { + // float registers + __ addi(sp, sp, -(FrameMap::nof_fpu_regs * wordSize)); + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + __ fsd(as_FloatRegister(i), Address(sp, i * wordSize)); + } + } else { + // we define reg_save_layout = 62 as the fixed frame size, + // we should also sub 32 * wordSize to sp when save_fpu_registers == false + __ addi(sp, sp, -32 * wordSize); + } + + return generate_oop_map(sasm, save_fpu_registers); +} + +static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) { + if (restore_fpu_registers) { + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + __ fld(as_FloatRegister(i), Address(sp, i * wordSize)); + } + __ addi(sp, sp, FrameMap::nof_fpu_regs * wordSize); + } else { + // we define reg_save_layout = 64 as the fixed frame size, + // we should also add 32 * wordSize to sp when save_fpu_registers == false + __ addi(sp, sp, 32 * wordSize); + } + + // if the number of popped regs is odd, the reserved slot for alignment will be removed + __ pop_reg(RegSet::range(x5, x31), sp); // integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4) +} + +static void restore_live_registers_except_r10(StubAssembler* sasm, bool restore_fpu_registers = true) { + if (restore_fpu_registers) { + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + __ fld(as_FloatRegister(i), Address(sp, i * wordSize)); + } + __ addi(sp, sp, FrameMap::nof_fpu_regs * wordSize); + } else { + // we define reg_save_layout = 64 as the fixed frame size, + // we should also add 32 * wordSize to sp when save_fpu_registers == false + __ addi(sp, sp, 32 * wordSize); + } + + // pop integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4) & x10 + // there is one reserved slot for alignment on the stack in save_live_registers(). + __ pop_reg(RegSet::range(x5, x9), sp); // pop x5 ~ x9 with the reserved slot for alignment + __ pop_reg(RegSet::range(x11, x31), sp); // pop x11 ~ x31; x10 will be automatically skipped here +} + +void Runtime1::initialize_pd() { + int i = 0; + int sp_offset = 0; + const int step = 2; // SP offsets are in halfwords + + // all float registers are saved explicitly + for (i = 0; i < FrameMap::nof_fpu_regs; i++) { + fpu_reg_save_offsets[i] = sp_offset; + sp_offset += step; + } + + // a slot reserved for stack 16-byte alignment, see MacroAssembler::push_reg + sp_offset += step; + // we save x5 ~ x31, except x0 ~ x4: loop starts from x5 + for (i = 5; i < FrameMap::nof_cpu_regs; i++) { + cpu_reg_save_offsets[i] = sp_offset; + sp_offset += step; + } +} + +// target: the entry point of the method that creates and posts the exception oop +// has_argument: true if the exception needs arguments (passed in t0 and t1) + +OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) { + // make a frame and preserve the caller's caller-save registers + OopMap* oop_map = save_live_registers(sasm); + assert_cond(oop_map != NULL); + int call_offset = 0; + if (!has_argument) { + call_offset = __ call_RT(noreg, noreg, target); + } else { + __ mv(c_rarg1, t0); + __ mv(c_rarg2, t1); + call_offset = __ call_RT(noreg, noreg, target); + } + OopMapSet* oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, oop_map); + + return oop_maps; +} + +OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { + __ block_comment("generate_handle_exception"); + + // incoming parameters + const Register exception_oop = x10; + const Register exception_pc = x13; + + OopMapSet* oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + OopMap* oop_map = NULL; + + switch (id) { + case forward_exception_id: + // We're handling an exception in the context of a compiled frame. + // The registers have been saved in the standard places. Perform + // an exception lookup in the caller and dispatch to the handler + // if found. Otherwise unwind and dispatch to the callers + // exception handler. + oop_map = generate_oop_map(sasm, 1 /* thread */); + + // load and clear pending exception oop into x10 + __ ld(exception_oop, Address(xthread, Thread::pending_exception_offset())); + __ sd(zr, Address(xthread, Thread::pending_exception_offset())); + + // load issuing PC (the return address for this stub) into x13 + __ ld(exception_pc, Address(fp, frame::return_addr_offset * BytesPerWord)); + + // make sure that the vm_results are cleared (may be unnecessary) + __ sd(zr, Address(xthread, JavaThread::vm_result_offset())); + __ sd(zr, Address(xthread, JavaThread::vm_result_2_offset())); + break; + case handle_exception_nofpu_id: + case handle_exception_id: + // At this point all registers MAY be live. + oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: { + // At this point all registers except exception oop (x10) and + // exception pc (ra) are dead. + const int frame_size = 2 /* fp, return address */; + oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0); + sasm->set_frame_size(frame_size); + break; + } + default: ShouldNotReachHere(); + } + + // verify that only x10 and x13 are valid at this time + __ invalidate_registers(false, true, true, false, true, true); + // verify that x10 contains a valid exception + __ verify_not_null_oop(exception_oop); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are + // empty before writing to them + Label oop_empty; + __ ld(t0, Address(xthread, JavaThread::exception_oop_offset())); + __ beqz(t0, oop_empty); + __ stop("exception oop already set"); + __ bind(oop_empty); + + Label pc_empty; + __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); + __ beqz(t0, pc_empty); + __ stop("exception pc already set"); + __ bind(pc_empty); +#endif + + // save exception oop and issuing pc into JavaThread + // (exception handler will load it from here) + __ sd(exception_oop, Address(xthread, JavaThread::exception_oop_offset())); + __ sd(exception_pc, Address(xthread, JavaThread::exception_pc_offset())); + + // patch throwing pc into return address (has bci & oop map) + __ sd(exception_pc, Address(fp, frame::return_addr_offset * BytesPerWord)); + + // compute the exception handler. + // the exception oop and the throwing pc are read from the fields in JavaThread + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc)); + guarantee(oop_map != NULL, "NULL oop_map!"); + oop_maps->add_gc_map(call_offset, oop_map); + + // x10: handler address + // will be the deopt blob if nmethod was deoptimized while we looked up + // handler regardless of whether handler existed in the nmethod. + + // only x10 is valid at this time, all other registers have been destroyed by the runtime call + __ invalidate_registers(false, true, true, true, true, true); + + // patch the return address, this stub will directly return to the exception handler + __ sd(x10, Address(fp, frame::return_addr_offset * BytesPerWord)); + + switch (id) { + case forward_exception_id: + case handle_exception_nofpu_id: + case handle_exception_id: + // Restore the registers that were saved at the beginning. + restore_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: + break; + default: ShouldNotReachHere(); + } + + return oop_maps; +} + + +void Runtime1::generate_unwind_exception(StubAssembler *sasm) { + // incoming parameters + const Register exception_oop = x10; + // other registers used in this stub + const Register handler_addr = x11; + + // verify that only x10, is valid at this time + __ invalidate_registers(false, true, true, true, true, true); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are empty + Label oop_empty; + __ ld(t0, Address(xthread, JavaThread::exception_oop_offset())); + __ beqz(t0, oop_empty); + __ stop("exception oop must be empty"); + __ bind(oop_empty); + + Label pc_empty; + __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); + __ beqz(t0, pc_empty); + __ stop("exception pc must be empty"); + __ bind(pc_empty); +#endif + + // Save our return address because + // exception_handler_for_return_address will destroy it. We also + // save exception_oop + __ addi(sp, sp, -2 * wordSize); + __ sd(exception_oop, Address(sp, wordSize)); + __ sd(ra, Address(sp)); + + // search the exception handler address of the caller (using the return address) + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), xthread, ra); + // x10: exception handler address of the caller + + // Only x10 is valid at this time; all other registers have been + // destroyed by the call. + __ invalidate_registers(false, true, true, true, false, true); + + // move result of call into correct register + __ mv(handler_addr, x10); + + // get throwing pc (= return address). + // ra has been destroyed by the call + __ ld(ra, Address(sp)); + __ ld(exception_oop, Address(sp, wordSize)); + __ addi(sp, sp, 2 * wordSize); + __ mv(x13, ra); + + __ verify_not_null_oop(exception_oop); + + // continue at exception handler (return address removed) + // note: do *not* remove arguments when unwinding the + // activation since the caller assumes having + // all arguments on the stack when entering the + // runtime to determine the exception handler + // (GC happens at call site with arguments!) + // x10: exception oop + // x13: throwing pc + // x11: exception handler + __ jr(handler_addr); +} + +OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { + // use the maximum number of runtime-arguments here because it is difficult to + // distinguish each RT-Call. + // Note: This number affects also the RT-Call in generate_handle_exception because + // the oop-map is shared for all calls. + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + OopMap* oop_map = save_live_registers(sasm); + assert_cond(oop_map != NULL); + + __ mv(c_rarg0, xthread); + Label retaddr; + __ set_last_Java_frame(sp, fp, retaddr, t0); + // do the call + int32_t off = 0; + __ la_patchable(t0, RuntimeAddress(target), off); + __ jalr(x1, t0, off); + __ bind(retaddr); + OopMapSet* oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(__ offset(), oop_map); + // verify callee-saved register +#ifdef ASSERT + { Label L; + __ get_thread(t0); + __ beq(xthread, t0, L); + __ stop("StubAssembler::call_RT: xthread not callee saved?"); + __ bind(L); + } +#endif + __ reset_last_Java_frame(true); + +#ifdef ASSERT + // Check that fields in JavaThread for exception oop and issuing pc are empty + Label oop_empty; + __ ld(t0, Address(xthread, Thread::pending_exception_offset())); + __ beqz(t0, oop_empty); + __ stop("exception oop must be empty"); + __ bind(oop_empty); + + Label pc_empty; + __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); + __ beqz(t0, pc_empty); + __ stop("exception pc must be empty"); + __ bind(pc_empty); +#endif + + // Runtime will return true if the nmethod has been deoptimized, this is the + // expected scenario and anything else is an error. Note that we maintain a + // check on the result purely as a defensive measure. + Label no_deopt; + __ beqz(x10, no_deopt); // Have we deoptimized? + + // Perform a re-execute. The proper return address is already on the stack, + // we just need to restore registers, pop all of our frames but the return + // address and jump to the deopt blob. + + restore_live_registers(sasm); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + + __ bind(no_deopt); + __ stop("deopt not performed"); + + return oop_maps; +} + +OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + // for better readability + const bool dont_gc_arguments = false; + + // default value; overwritten for some optimized stubs that are called from methods that do not use the fpu + bool save_fpu_registers = true; + + // stub code & info for the different stubs + OopMapSet* oop_maps = NULL; + switch (id) { + { + case forward_exception_id: + { + oop_maps = generate_handle_exception(id, sasm); + __ leave(); + __ ret(); + } + break; + + case throw_div0_exception_id: + { + StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); + } + break; + + case throw_null_pointer_exception_id: + { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); + } + break; + + case new_instance_id: + case fast_new_instance_id: + case fast_new_instance_init_check_id: + { + Register klass = x13; // Incoming + Register obj = x10; // Result + + if (id == new_instance_id) { + __ set_info("new_instance", dont_gc_arguments); + } else if (id == fast_new_instance_id) { + __ set_info("fast new_instance", dont_gc_arguments); + } else { + assert(id == fast_new_instance_init_check_id, "bad StubID"); + __ set_info("fast new_instance init check", dont_gc_arguments); + } + + // If TLAB is disabled, see if there is support for inlining contiguous + // allocations. + // Otherwise, just go to the slow path. + if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && + !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { + Label slow_path; + Register obj_size = x12; + Register tmp1 = x9; + Register tmp2 = x14; + assert_different_registers(klass, obj, obj_size, tmp1, tmp2); + + const int sp_offset = 2; + const int x9_offset = 1; + const int zr_offset = 0; + __ addi(sp, sp, -(sp_offset * wordSize)); + __ sd(x9, Address(sp, x9_offset * wordSize)); + __ sd(zr, Address(sp, zr_offset * wordSize)); + + if (id == fast_new_instance_init_check_id) { + // make sure the klass is initialized + __ lbu(t0, Address(klass, InstanceKlass::init_state_offset())); + __ mv(t1, InstanceKlass::fully_initialized); + __ bne(t0, t1, slow_path); + } + +#ifdef ASSERT + // assert object can be fast path allocated + { + Label ok, not_ok; + __ lw(obj_size, Address(klass, Klass::layout_helper_offset())); + // make sure it's an instance. For instances, layout helper is a positive number. + // For arrays, layout helper is a negative number + __ blez(obj_size, not_ok); + __ andi(t0, obj_size, Klass::_lh_instance_slow_path_bit); + __ beqz(t0, ok); + __ bind(not_ok); + __ stop("assert(can be fast path allocated)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + // get the instance size + __ lwu(obj_size, Address(klass, Klass::layout_helper_offset())); + + __ eden_allocate(obj, obj_size, 0, tmp1, slow_path); + + __ initialize_object(obj, klass, obj_size, 0, tmp1, tmp2, /* is_tlab_allocated */ false); + __ verify_oop(obj); + __ ld(x9, Address(sp, x9_offset * wordSize)); + __ ld(zr, Address(sp, zr_offset * wordSize)); + __ addi(sp, sp, sp_offset * wordSize); + __ ret(); + + __ bind(slow_path); + __ ld(x9, Address(sp, x9_offset * wordSize)); + __ ld(zr, Address(sp, zr_offset * wordSize)); + __ addi(sp, sp, sp_offset * wordSize); + } + + __ enter(); + OopMap* map = save_live_registers(sasm); + assert_cond(map != NULL); + int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass); + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r10(sasm); + __ verify_oop(obj); + __ leave(); + __ ret(); + + // x10: new instance + } + + break; + + case counter_overflow_id: + { + Register bci = x10; + Register method = x11; + __ enter(); + OopMap* map = save_live_registers(sasm); + assert_cond(map != NULL); + + const int bci_off = 0; + const int method_off = 1; + // Retrieve bci + __ lw(bci, Address(fp, bci_off * BytesPerWord)); + // And a pointer to the Method* + __ ld(method, Address(fp, method_off * BytesPerWord)); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method); + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ leave(); + __ ret(); + } + break; + + case new_type_array_id: + case new_object_array_id: + { + Register length = x9; // Incoming + Register klass = x13; // Incoming + Register obj = x10; // Result + + if (id == new_type_array_id) { + __ set_info("new_type_array", dont_gc_arguments); + } else { + __ set_info("new_object_array", dont_gc_arguments); + } + +#ifdef ASSERT + // assert object type is really an array of the proper kind + { + Label ok; + Register tmp = obj; + __ lwu(tmp, Address(klass, Klass::layout_helper_offset())); + __ sraiw(tmp, tmp, Klass::_lh_array_tag_shift); + int tag = ((id == new_type_array_id) ? Klass::_lh_array_tag_type_value : Klass::_lh_array_tag_obj_value); + __ mv(t0, tag); + __ beq(t0, tmp, ok); + __ stop("assert(is an array klass)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + // If TLAB is disabled, see if there is support for inlining contiguous + // allocations. + // Otherwise, just go to the slow path. + if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { + Register arr_size = x14; + Register tmp1 = x12; + Register tmp2 = x15; + Label slow_path; + assert_different_registers(length, klass, obj, arr_size, tmp1, tmp2); + + // check that array length is small enough for fast path. + __ mv(t0, C1_MacroAssembler::max_array_allocation_length); + __ bgtu(length, t0, slow_path); + + // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) + __ lwu(tmp1, Address(klass, Klass::layout_helper_offset())); + __ andi(t0, tmp1, 0x1f); + __ sll(arr_size, length, t0); + int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1); + int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width; + __ slli(tmp1, tmp1, XLEN - lh_header_size_msb); + __ srli(tmp1, tmp1, XLEN - lh_header_size_width); + __ add(arr_size, arr_size, tmp1); + __ addi(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up + __ andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask); + + __ eden_allocate(obj, arr_size, 0, tmp1, slow_path); // preserves arr_size + + __ initialize_header(obj, klass, length, tmp1, tmp2); + __ lbu(tmp1, Address(klass, + in_bytes(Klass::layout_helper_offset()) + + (Klass::_lh_header_size_shift / BitsPerByte))); + assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); + assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); + __ andi(tmp1, tmp1, Klass::_lh_header_size_mask); + __ sub(arr_size, arr_size, tmp1); // body length + __ add(tmp1, tmp1, obj); // body start + __ initialize_body(tmp1, arr_size, 0, tmp2); + __ membar(MacroAssembler::StoreStore); + __ verify_oop(obj); + + __ ret(); + + __ bind(slow_path); + } + + __ enter(); + OopMap* map = save_live_registers(sasm); + assert_cond(map != NULL); + int call_offset = 0; + if (id == new_type_array_id) { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); + } else { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); + } + + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r10(sasm); + + __ verify_oop(obj); + __ leave(); + __ ret(); + + // x10: new array + } + break; + + case new_multi_array_id: + { + StubFrame f(sasm, "new_multi_array", dont_gc_arguments); + // x10: klass + // x9: rank + // x12: address of 1st dimension + OopMap* map = save_live_registers(sasm); + assert_cond(map != NULL); + __ mv(c_rarg1, x10); + __ mv(c_rarg3, x12); + __ mv(c_rarg2, x9); + int call_offset = __ call_RT(x10, noreg, CAST_FROM_FN_PTR(address, new_multi_array), x11, x12, x13); + + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r10(sasm); + + // x10: new multi array + __ verify_oop(x10); + } + break; + + case register_finalizer_id: + { + __ set_info("register_finalizer", dont_gc_arguments); + + // This is called via call_runtime so the arguments + // will be place in C abi locations + __ verify_oop(c_rarg0); + + // load the klass and check the has finalizer flag + Label register_finalizer; + Register t = x15; + __ load_klass(t, x10); + __ lwu(t, Address(t, Klass::access_flags_offset())); + __ andi(t0, t, JVM_ACC_HAS_FINALIZER); + __ bnez(t0, register_finalizer); + __ ret(); + + __ bind(register_finalizer); + __ enter(); + OopMap* oop_map = save_live_registers(sasm); + assert_cond(oop_map != NULL); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), x10); + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, oop_map); + + // Now restore all the live registers + restore_live_registers(sasm); + + __ leave(); + __ ret(); + } + break; + + case throw_class_cast_exception_id: + { + StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); + } + break; + + case throw_incompatible_class_change_error_id: + { + StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, + CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); + } + break; + + case slow_subtype_check_id: + { + // Typical calling sequence: + // push klass_RInfo (object klass or other subclass) + // push sup_k_RInfo (array element klass or other superclass) + // jump to slow_subtype_check + // Note that the subclass is pushed first, and is therefore deepest. + enum layout { + x10_off, x10_off_hi, + x12_off, x12_off_hi, + x14_off, x14_off_hi, + x15_off, x15_off_hi, + sup_k_off, sup_k_off_hi, + klass_off, klass_off_hi, + framesize, + result_off = sup_k_off + }; + + __ set_info("slow_subtype_check", dont_gc_arguments); + __ push_reg(RegSet::of(x10, x12, x14, x15), sp); + + __ ld(x14, Address(sp, (klass_off) * VMRegImpl::stack_slot_size)); // sub klass + __ ld(x10, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size)); // super klass + + Label miss; + __ check_klass_subtype_slow_path(x14, x10, x12, x15, NULL, &miss); + + // fallthrough on success: + __ li(t0, 1); + __ sd(t0, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result + __ pop_reg(RegSet::of(x10, x12, x14, x15), sp); + __ ret(); + + __ bind(miss); + __ sd(zr, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result + __ pop_reg(RegSet::of(x10, x12, x14, x15), sp); + __ ret(); + } + break; + + case monitorenter_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorenter_id: + { + StubFrame f(sasm, "monitorenter", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, save_fpu_registers); + assert_cond(map != NULL); + + // Called with store_parameter and not C abi + f.load_argument(1, x10); // x10: object + f.load_argument(0, x11); // x11: lock address + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), x10, x11); + + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case monitorexit_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorexit_id: + { + StubFrame f(sasm, "monitorexit", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, save_fpu_registers); + assert_cond(map != NULL); + + // Called with store_parameter and not C abi + f.load_argument(0, x10); // x10: lock address + + // note: really a leaf routine but must setup last java sp + // => use call_RT for now (speed can be improved by + // doing last java sp setup manually) + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), x10); + + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case deoptimize_id: + { + StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return); + OopMap* oop_map = save_live_registers(sasm); + assert_cond(oop_map != NULL); + f.load_argument(0, c_rarg1); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), c_rarg1); + + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + } + break; + + case throw_range_check_failed_id: + { + StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); + } + break; + + case unwind_exception_id: + { + __ set_info("unwind_exception", dont_gc_arguments); + // note: no stubframe since we are about to leave the current + // activation and we are calling a leaf VM function only. + generate_unwind_exception(sasm); + } + break; + + case access_field_patching_id: + { + StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); + } + break; + + case load_klass_patching_id: + { + StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); + } + break; + + case load_mirror_patching_id: + { + StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); + } + break; + + case load_appendix_patching_id: + { + StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); + } + break; + + case handle_exception_nofpu_id: + case handle_exception_id: + { + StubFrame f(sasm, "handle_exception", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case handle_exception_from_callee_id: + { + StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case throw_index_exception_id: + { + StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); + } + break; + + case throw_array_store_exception_id: + { + StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return); + // tos + 0: link + // + 1: return address + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); + } + break; + + case predicate_failed_trap_id: + { + StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return); + + OopMap* map = save_live_registers(sasm); + assert_cond(map != NULL); + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ leave(); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + } + break; + + case dtrace_object_alloc_id: + { // c_rarg0: object + StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); + save_live_registers(sasm); + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0); + + restore_live_registers(sasm); + } + break; + + default: + { + StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return); + __ li(x10, (int) id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10); + __ should_not_reach_here(); + } + break; + } + } + return oop_maps; +} + +#undef __ + +const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; } Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_globals_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_globals_riscv.hpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_GLOBALS_RISCV_HPP +#define CPU_RISCV_C1_GLOBALS_RISCV_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the client compiler. +// (see c1_globals.hpp) + +#ifndef COMPILER2 +define_pd_global(bool, BackgroundCompilation, true ); +define_pd_global(bool, InlineIntrinsics, true ); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, false); +define_pd_global(bool, UseOnStackReplacement, true ); +define_pd_global(bool, TieredCompilation, false); +define_pd_global(intx, CompileThreshold, 1500 ); + +define_pd_global(intx, OnStackReplacePercentage, 933 ); +define_pd_global(intx, NewSizeThreadIncrease, 4*K ); +define_pd_global(intx, InitialCodeCacheSize, 160*K); +define_pd_global(intx, ReservedCodeCacheSize, 32*M ); +define_pd_global(intx, NonProfiledCodeHeapSize, 13*M ); +define_pd_global(intx, ProfiledCodeHeapSize, 14*M ); +define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(bool, ProfileInterpreter, false); +define_pd_global(intx, CodeCacheExpansionSize, 32*K ); +define_pd_global(uintx, CodeCacheMinBlockLength, 1); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); +define_pd_global(bool, NeverActAsServerClassMachine, true ); +define_pd_global(uint64_t, MaxRAM, 1ULL*G); +define_pd_global(bool, CICompileOSR, true ); +#endif // !COMPILER2 +define_pd_global(bool, UseTypeProfile, false); + +define_pd_global(bool, OptimizeSinglePrecision, true ); +define_pd_global(bool, CSEArrayLength, false); +define_pd_global(bool, TwoOperandLIRForm, false); + +#endif // CPU_RISCV_C1_GLOBALS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -0,0 +1,1641 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "opto/c2_MacroAssembler.hpp" +#include "opto/intrinsicnode.hpp" +#include "opto/subnode.hpp" +#include "runtime/stubRoutines.hpp" + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) block_comment(str) +#define STOP(error) block_comment(error); stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// short string +// StringUTF16.indexOfChar +// StringLatin1.indexOfChar +void C2_MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, + Register ch, Register result, + bool isL) +{ + Register ch1 = t0; + Register index = t1; + + BLOCK_COMMENT("string_indexof_char_short {"); + + Label LOOP, LOOP1, LOOP4, LOOP8; + Label MATCH, MATCH1, MATCH2, MATCH3, + MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; + + mv(result, -1); + mv(index, zr); + + bind(LOOP); + addi(t0, index, 8); + ble(t0, cnt1, LOOP8); + addi(t0, index, 4); + ble(t0, cnt1, LOOP4); + j(LOOP1); + + bind(LOOP8); + isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); + beq(ch, ch1, MATCH); + isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); + beq(ch, ch1, MATCH1); + isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); + beq(ch, ch1, MATCH2); + isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); + beq(ch, ch1, MATCH3); + isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); + beq(ch, ch1, MATCH4); + isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); + beq(ch, ch1, MATCH5); + isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); + beq(ch, ch1, MATCH6); + isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); + beq(ch, ch1, MATCH7); + addi(index, index, 8); + addi(str1, str1, isL ? 8 : 16); + blt(index, cnt1, LOOP); + j(NOMATCH); + + bind(LOOP4); + isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); + beq(ch, ch1, MATCH); + isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); + beq(ch, ch1, MATCH1); + isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); + beq(ch, ch1, MATCH2); + isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); + beq(ch, ch1, MATCH3); + addi(index, index, 4); + addi(str1, str1, isL ? 4 : 8); + bge(index, cnt1, NOMATCH); + + bind(LOOP1); + isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); + beq(ch, ch1, MATCH); + addi(index, index, 1); + addi(str1, str1, isL ? 1 : 2); + blt(index, cnt1, LOOP1); + j(NOMATCH); + + bind(MATCH1); + addi(index, index, 1); + j(MATCH); + + bind(MATCH2); + addi(index, index, 2); + j(MATCH); + + bind(MATCH3); + addi(index, index, 3); + j(MATCH); + + bind(MATCH4); + addi(index, index, 4); + j(MATCH); + + bind(MATCH5); + addi(index, index, 5); + j(MATCH); + + bind(MATCH6); + addi(index, index, 6); + j(MATCH); + + bind(MATCH7); + addi(index, index, 7); + + bind(MATCH); + mv(result, index); + bind(NOMATCH); + BLOCK_COMMENT("} string_indexof_char_short"); +} + +// StringUTF16.indexOfChar +// StringLatin1.indexOfChar +void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + bool isL) +{ + Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; + Register ch1 = t0; + Register orig_cnt = t1; + Register mask1 = tmp3; + Register mask2 = tmp2; + Register match_mask = tmp1; + Register trailing_char = tmp4; + Register unaligned_elems = tmp4; + + BLOCK_COMMENT("string_indexof_char {"); + beqz(cnt1, NOMATCH); + + addi(t0, cnt1, isL ? -32 : -16); + bgtz(t0, DO_LONG); + string_indexof_char_short(str1, cnt1, ch, result, isL); + j(DONE); + + bind(DO_LONG); + mv(orig_cnt, cnt1); + if (AvoidUnalignedAccesses) { + Label ALIGNED; + andi(unaligned_elems, str1, 0x7); + beqz(unaligned_elems, ALIGNED); + sub(unaligned_elems, unaligned_elems, 8); + neg(unaligned_elems, unaligned_elems); + if (!isL) { + srli(unaligned_elems, unaligned_elems, 1); + } + // do unaligned part per element + string_indexof_char_short(str1, unaligned_elems, ch, result, isL); + bgez(result, DONE); + mv(orig_cnt, cnt1); + sub(cnt1, cnt1, unaligned_elems); + bind(ALIGNED); + } + + // duplicate ch + if (isL) { + slli(ch1, ch, 8); + orr(ch, ch1, ch); + } + slli(ch1, ch, 16); + orr(ch, ch1, ch); + slli(ch1, ch, 32); + orr(ch, ch1, ch); + + if (!isL) { + slli(cnt1, cnt1, 1); + } + + uint64_t mask0101 = UCONST64(0x0101010101010101); + uint64_t mask0001 = UCONST64(0x0001000100010001); + mv(mask1, isL ? mask0101 : mask0001); + uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); + uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); + mv(mask2, isL ? mask7f7f : mask7fff); + + bind(CH1_LOOP); + ld(ch1, Address(str1)); + addi(str1, str1, 8); + addi(cnt1, cnt1, -8); + compute_match_mask(ch1, ch, match_mask, mask1, mask2); + bnez(match_mask, HIT); + bgtz(cnt1, CH1_LOOP); + j(NOMATCH); + + bind(HIT); + ctzc_bit(trailing_char, match_mask, isL, ch1, result); + srli(trailing_char, trailing_char, 3); + addi(cnt1, cnt1, 8); + ble(cnt1, trailing_char, NOMATCH); + // match case + if (!isL) { + srli(cnt1, cnt1, 1); + srli(trailing_char, trailing_char, 1); + } + + sub(result, orig_cnt, cnt1); + add(result, result, trailing_char); + j(DONE); + + bind(NOMATCH); + mv(result, -1); + + bind(DONE); + BLOCK_COMMENT("} string_indexof_char"); +} + +typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); + +// Search for needle in haystack and return index or -1 +// x10: result +// x11: haystack +// x12: haystack_len +// x13: needle +// x14: needle_len +void C2_MacroAssembler::string_indexof(Register haystack, Register needle, + Register haystack_len, Register needle_len, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + Register tmp5, Register tmp6, + Register result, int ae) +{ + assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); + + Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; + + Register ch1 = t0; + Register ch2 = t1; + Register nlen_tmp = tmp1; // needle len tmp + Register hlen_tmp = tmp2; // haystack len tmp + Register result_tmp = tmp4; + + bool isLL = ae == StrIntrinsicNode::LL; + + bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; + bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; + int needle_chr_shift = needle_isL ? 0 : 1; + int haystack_chr_shift = haystack_isL ? 0 : 1; + int needle_chr_size = needle_isL ? 1 : 2; + int haystack_chr_size = haystack_isL ? 1 : 2; + load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : + (load_chr_insn)&MacroAssembler::lhu; + load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : + (load_chr_insn)&MacroAssembler::lhu; + + BLOCK_COMMENT("string_indexof {"); + + // Note, inline_string_indexOf() generates checks: + // if (pattern.count > src.count) return -1; + // if (pattern.count == 0) return 0; + + // We have two strings, a source string in haystack, haystack_len and a pattern string + // in needle, needle_len. Find the first occurence of pattern in source or return -1. + + // For larger pattern and source we use a simplified Boyer Moore algorithm. + // With a small pattern and source we use linear scan. + + // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. + sub(result_tmp, haystack_len, needle_len); + // needle_len < 8, use linear scan + sub(t0, needle_len, 8); + bltz(t0, LINEARSEARCH); + // needle_len >= 256, use linear scan + sub(t0, needle_len, 256); + bgez(t0, LINEARSTUB); + // needle_len >= haystack_len/4, use linear scan + srli(t0, haystack_len, 2); + bge(needle_len, t0, LINEARSTUB); + + // Boyer-Moore-Horspool introduction: + // The Boyer Moore alogorithm is based on the description here:- + // + // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm + // + // This describes and algorithm with 2 shift rules. The 'Bad Character' rule + // and the 'Good Suffix' rule. + // + // These rules are essentially heuristics for how far we can shift the + // pattern along the search string. + // + // The implementation here uses the 'Bad Character' rule only because of the + // complexity of initialisation for the 'Good Suffix' rule. + // + // This is also known as the Boyer-Moore-Horspool algorithm: + // + // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm + // + // #define ASIZE 256 + // + // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { + // int i, j; + // unsigned c; + // unsigned char bc[ASIZE]; + // + // /* Preprocessing */ + // for (i = 0; i < ASIZE; ++i) + // bc[i] = m; + // for (i = 0; i < m - 1; ) { + // c = pattern[i]; + // ++i; + // // c < 256 for Latin1 string, so, no need for branch + // #ifdef PATTERN_STRING_IS_LATIN1 + // bc[c] = m - i; + // #else + // if (c < ASIZE) bc[c] = m - i; + // #endif + // } + // + // /* Searching */ + // j = 0; + // while (j <= n - m) { + // c = src[i+j]; + // if (pattern[m-1] == c) + // int k; + // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); + // if (k < 0) return j; + // // c < 256 for Latin1 string, so, no need for branch + // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 + // // LL case: (c< 256) always true. Remove branch + // j += bc[pattern[j+m-1]]; + // #endif + // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF + // // UU case: need if (c if not. + // if (c < ASIZE) + // j += bc[pattern[j+m-1]]; + // else + // j += m + // #endif + // } + // return -1; + // } + + // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result + Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, + BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; + + Register haystack_end = haystack_len; + Register skipch = tmp2; + + // pattern length is >=8, so, we can read at least 1 register for cases when + // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for + // UL case. We'll re-read last character in inner pre-loop code to have + // single outer pre-loop load + const int firstStep = isLL ? 7 : 3; + + const int ASIZE = 256; + const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) + + sub(sp, sp, ASIZE); + + // init BC offset table with default value: needle_len + slli(t0, needle_len, 8); + orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] + slli(tmp1, t0, 16); + orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] + slli(tmp1, t0, 32); + orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] + + mv(ch1, sp); // ch1 is t0 + mv(tmp6, ASIZE / STORE_BYTES); // loop iterations + + bind(BM_INIT_LOOP); + // for (i = 0; i < ASIZE; ++i) + // bc[i] = m; + for (int i = 0; i < 4; i++) { + sd(tmp5, Address(ch1, i * wordSize)); + } + add(ch1, ch1, 32); + sub(tmp6, tmp6, 4); + bgtz(tmp6, BM_INIT_LOOP); + + sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern + Register orig_haystack = tmp5; + mv(orig_haystack, haystack); + // result_tmp = tmp4 + shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); + sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 + mv(tmp3, needle); + + // for (i = 0; i < m - 1; ) { + // c = pattern[i]; + // ++i; + // // c < 256 for Latin1 string, so, no need for branch + // #ifdef PATTERN_STRING_IS_LATIN1 + // bc[c] = m - i; + // #else + // if (c < ASIZE) bc[c] = m - i; + // #endif + // } + bind(BCLOOP); + (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); + add(tmp3, tmp3, needle_chr_size); + if (!needle_isL) { + // ae == StrIntrinsicNode::UU + mv(tmp6, ASIZE); + bgeu(ch1, tmp6, BCSKIP); + } + add(tmp4, sp, ch1); + sb(ch2, Address(tmp4)); // store skip offset to BC offset table + + bind(BCSKIP); + sub(ch2, ch2, 1); // for next pattern element, skip distance -1 + bgtz(ch2, BCLOOP); + + // tmp6: pattern end, address after needle + shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); + if (needle_isL == haystack_isL) { + // load last 8 bytes (8LL/4UU symbols) + ld(tmp6, Address(tmp6, -wordSize)); + } else { + // UL: from UTF-16(source) search Latin1(pattern) + lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) + // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d + // We'll have to wait until load completed, but it's still faster than per-character loads+checks + srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a + slli(ch2, tmp6, XLEN - 24); + srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b + slli(ch1, tmp6, XLEN - 16); + srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c + andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d + slli(ch2, ch2, 16); + orr(ch2, ch2, ch1); // 0x00000b0c + slli(result, tmp3, 48); // use result as temp register + orr(tmp6, tmp6, result); // 0x0a00000d + slli(result, ch2, 16); + orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d + } + + // i = m - 1; + // skipch = j + i; + // if (skipch == pattern[m - 1] + // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); + // else + // move j with bad char offset table + bind(BMLOOPSTR2); + // compare pattern to source string backward + shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); + (this->*haystack_load_1chr)(skipch, Address(result), noreg); + sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 + if (needle_isL == haystack_isL) { + // re-init tmp3. It's for free because it's executed in parallel with + // load above. Alternative is to initialize it before loop, but it'll + // affect performance on in-order systems with 2 or more ld/st pipelines + srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] + } + if (!isLL) { // UU/UL case + slli(ch2, nlen_tmp, 1); // offsets in bytes + } + bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char + add(result, haystack, isLL ? nlen_tmp : ch2); + ld(ch2, Address(result)); // load 8 bytes from source string + mv(ch1, tmp6); + if (isLL) { + j(BMLOOPSTR1_AFTER_LOAD); + } else { + sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 + j(BMLOOPSTR1_CMP); + } + + bind(BMLOOPSTR1); + shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); + (this->*needle_load_1chr)(ch1, Address(ch1), noreg); + shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); + (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); + + bind(BMLOOPSTR1_AFTER_LOAD); + sub(nlen_tmp, nlen_tmp, 1); + bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); + + bind(BMLOOPSTR1_CMP); + beq(ch1, ch2, BMLOOPSTR1); + + bind(BMSKIP); + if (!isLL) { + // if we've met UTF symbol while searching Latin1 pattern, then we can + // skip needle_len symbols + if (needle_isL != haystack_isL) { + mv(result_tmp, needle_len); + } else { + mv(result_tmp, 1); + } + mv(t0, ASIZE); + bgeu(skipch, t0, BMADV); + } + add(result_tmp, sp, skipch); + lbu(result_tmp, Address(result_tmp)); // load skip offset + + bind(BMADV); + sub(nlen_tmp, needle_len, 1); + // move haystack after bad char skip offset + shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); + ble(haystack, haystack_end, BMLOOPSTR2); + add(sp, sp, ASIZE); + j(NOMATCH); + + bind(BMLOOPSTR1_LASTCMP); + bne(ch1, ch2, BMSKIP); + + bind(BMMATCH); + sub(result, haystack, orig_haystack); + if (!haystack_isL) { + srli(result, result, 1); + } + add(sp, sp, ASIZE); + j(DONE); + + bind(LINEARSTUB); + sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm + bltz(t0, LINEARSEARCH); + mv(result, zr); + RuntimeAddress stub = NULL; + if (isLL) { + stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); + assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); + } else if (needle_isL) { + stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); + assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); + } else { + stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); + assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); + } + trampoline_call(stub); + j(DONE); + + bind(NOMATCH); + mv(result, -1); + j(DONE); + + bind(LINEARSEARCH); + string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); + + bind(DONE); + BLOCK_COMMENT("} string_indexof"); +} + +// string_indexof +// result: x10 +// src: x11 +// src_count: x12 +// pattern: x13 +// pattern_count: x14 or 1/2/3/4 +void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, + Register haystack_len, Register needle_len, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + int needle_con_cnt, Register result, int ae) +{ + // Note: + // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant + // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 + assert(needle_con_cnt <= 4, "Invalid needle constant count"); + assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); + + Register ch1 = t0; + Register ch2 = t1; + Register hlen_neg = haystack_len, nlen_neg = needle_len; + Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; + + bool isLL = ae == StrIntrinsicNode::LL; + + bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; + bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; + int needle_chr_shift = needle_isL ? 0 : 1; + int haystack_chr_shift = haystack_isL ? 0 : 1; + int needle_chr_size = needle_isL ? 1 : 2; + int haystack_chr_size = haystack_isL ? 1 : 2; + + load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : + (load_chr_insn)&MacroAssembler::lhu; + load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : + (load_chr_insn)&MacroAssembler::lhu; + load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; + load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; + + Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; + + Register first = tmp3; + + if (needle_con_cnt == -1) { + Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; + + sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); + bltz(t0, DOSHORT); + + (this->*needle_load_1chr)(first, Address(needle), noreg); + slli(t0, needle_len, needle_chr_shift); + add(needle, needle, t0); + neg(nlen_neg, t0); + slli(t0, result_tmp, haystack_chr_shift); + add(haystack, haystack, t0); + neg(hlen_neg, t0); + + bind(FIRST_LOOP); + add(t0, haystack, hlen_neg); + (this->*haystack_load_1chr)(ch2, Address(t0), noreg); + beq(first, ch2, STR1_LOOP); + + bind(STR2_NEXT); + add(hlen_neg, hlen_neg, haystack_chr_size); + blez(hlen_neg, FIRST_LOOP); + j(NOMATCH); + + bind(STR1_LOOP); + add(nlen_tmp, nlen_neg, needle_chr_size); + add(hlen_tmp, hlen_neg, haystack_chr_size); + bgez(nlen_tmp, MATCH); + + bind(STR1_NEXT); + add(ch1, needle, nlen_tmp); + (this->*needle_load_1chr)(ch1, Address(ch1), noreg); + add(ch2, haystack, hlen_tmp); + (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); + bne(ch1, ch2, STR2_NEXT); + add(nlen_tmp, nlen_tmp, needle_chr_size); + add(hlen_tmp, hlen_tmp, haystack_chr_size); + bltz(nlen_tmp, STR1_NEXT); + j(MATCH); + + bind(DOSHORT); + if (needle_isL == haystack_isL) { + sub(t0, needle_len, 2); + bltz(t0, DO1); + bgtz(t0, DO3); + } + } + + if (needle_con_cnt == 4) { + Label CH1_LOOP; + (this->*load_4chr)(ch1, Address(needle), noreg); + sub(result_tmp, haystack_len, 4); + slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp + add(haystack, haystack, tmp3); + neg(hlen_neg, tmp3); + + bind(CH1_LOOP); + add(ch2, haystack, hlen_neg); + (this->*load_4chr)(ch2, Address(ch2), noreg); + beq(ch1, ch2, MATCH); + add(hlen_neg, hlen_neg, haystack_chr_size); + blez(hlen_neg, CH1_LOOP); + j(NOMATCH); + } + + if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { + Label CH1_LOOP; + BLOCK_COMMENT("string_indexof DO2 {"); + bind(DO2); + (this->*load_2chr)(ch1, Address(needle), noreg); + if (needle_con_cnt == 2) { + sub(result_tmp, haystack_len, 2); + } + slli(tmp3, result_tmp, haystack_chr_shift); + add(haystack, haystack, tmp3); + neg(hlen_neg, tmp3); + + bind(CH1_LOOP); + add(tmp3, haystack, hlen_neg); + (this->*load_2chr)(ch2, Address(tmp3), noreg); + beq(ch1, ch2, MATCH); + add(hlen_neg, hlen_neg, haystack_chr_size); + blez(hlen_neg, CH1_LOOP); + j(NOMATCH); + BLOCK_COMMENT("} string_indexof DO2"); + } + + if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { + Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; + BLOCK_COMMENT("string_indexof DO3 {"); + + bind(DO3); + (this->*load_2chr)(first, Address(needle), noreg); + (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); + if (needle_con_cnt == 3) { + sub(result_tmp, haystack_len, 3); + } + slli(hlen_tmp, result_tmp, haystack_chr_shift); + add(haystack, haystack, hlen_tmp); + neg(hlen_neg, hlen_tmp); + + bind(FIRST_LOOP); + add(ch2, haystack, hlen_neg); + (this->*load_2chr)(ch2, Address(ch2), noreg); + beq(first, ch2, STR1_LOOP); + + bind(STR2_NEXT); + add(hlen_neg, hlen_neg, haystack_chr_size); + blez(hlen_neg, FIRST_LOOP); + j(NOMATCH); + + bind(STR1_LOOP); + add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); + add(ch2, haystack, hlen_tmp); + (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); + bne(ch1, ch2, STR2_NEXT); + j(MATCH); + BLOCK_COMMENT("} string_indexof DO3"); + } + + if (needle_con_cnt == -1 || needle_con_cnt == 1) { + Label DO1_LOOP; + + BLOCK_COMMENT("string_indexof DO1 {"); + bind(DO1); + (this->*needle_load_1chr)(ch1, Address(needle), noreg); + sub(result_tmp, haystack_len, 1); + mv(tmp3, result_tmp); + if (haystack_chr_shift) { + slli(tmp3, result_tmp, haystack_chr_shift); + } + add(haystack, haystack, tmp3); + neg(hlen_neg, tmp3); + + bind(DO1_LOOP); + add(tmp3, haystack, hlen_neg); + (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); + beq(ch1, ch2, MATCH); + add(hlen_neg, hlen_neg, haystack_chr_size); + blez(hlen_neg, DO1_LOOP); + BLOCK_COMMENT("} string_indexof DO1"); + } + + bind(NOMATCH); + mv(result, -1); + j(DONE); + + bind(MATCH); + srai(t0, hlen_neg, haystack_chr_shift); + add(result, result_tmp, t0); + + bind(DONE); +} + +// Compare strings. +void C2_MacroAssembler::string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, + Register tmp3, int ae) +{ + Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, + DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, + SHORT_LOOP_START, TAIL_CHECK, L; + + const int STUB_THRESHOLD = 64 + 8; + bool isLL = ae == StrIntrinsicNode::LL; + bool isLU = ae == StrIntrinsicNode::LU; + bool isUL = ae == StrIntrinsicNode::UL; + + bool str1_isL = isLL || isLU; + bool str2_isL = isLL || isUL; + + // for L strings, 1 byte for 1 character + // for U strings, 2 bytes for 1 character + int str1_chr_size = str1_isL ? 1 : 2; + int str2_chr_size = str2_isL ? 1 : 2; + int minCharsInWord = isLL ? wordSize : wordSize / 2; + + load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; + load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; + + BLOCK_COMMENT("string_compare {"); + + // Bizzarely, the counts are passed in bytes, regardless of whether they + // are L or U strings, however the result is always in characters. + if (!str1_isL) { + sraiw(cnt1, cnt1, 1); + } + if (!str2_isL) { + sraiw(cnt2, cnt2, 1); + } + + // Compute the minimum of the string lengths and save the difference in result. + sub(result, cnt1, cnt2); + bgt(cnt1, cnt2, L); + mv(cnt2, cnt1); + bind(L); + + // A very short string + li(t0, minCharsInWord); + ble(cnt2, t0, SHORT_STRING); + + // Compare longwords + // load first parts of strings and finish initialization while loading + { + if (str1_isL == str2_isL) { // LL or UU + // load 8 bytes once to compare + ld(tmp1, Address(str1)); + beq(str1, str2, DONE); + ld(tmp2, Address(str2)); + li(t0, STUB_THRESHOLD); + bge(cnt2, t0, STUB); + sub(cnt2, cnt2, minCharsInWord); + beqz(cnt2, TAIL_CHECK); + // convert cnt2 from characters to bytes + if (!str1_isL) { + slli(cnt2, cnt2, 1); + } + add(str2, str2, cnt2); + add(str1, str1, cnt2); + sub(cnt2, zr, cnt2); + } else if (isLU) { // LU case + lwu(tmp1, Address(str1)); + ld(tmp2, Address(str2)); + li(t0, STUB_THRESHOLD); + bge(cnt2, t0, STUB); + addi(cnt2, cnt2, -4); + add(str1, str1, cnt2); + sub(cnt1, zr, cnt2); + slli(cnt2, cnt2, 1); + add(str2, str2, cnt2); + inflate_lo32(tmp3, tmp1); + mv(tmp1, tmp3); + sub(cnt2, zr, cnt2); + addi(cnt1, cnt1, 4); + } else { // UL case + ld(tmp1, Address(str1)); + lwu(tmp2, Address(str2)); + li(t0, STUB_THRESHOLD); + bge(cnt2, t0, STUB); + addi(cnt2, cnt2, -4); + slli(t0, cnt2, 1); + sub(cnt1, zr, t0); + add(str1, str1, t0); + add(str2, str2, cnt2); + inflate_lo32(tmp3, tmp2); + mv(tmp2, tmp3); + sub(cnt2, zr, cnt2); + addi(cnt1, cnt1, 8); + } + addi(cnt2, cnt2, isUL ? 4 : 8); + bgez(cnt2, TAIL); + xorr(tmp3, tmp1, tmp2); + bnez(tmp3, DIFFERENCE); + + // main loop + bind(NEXT_WORD); + if (str1_isL == str2_isL) { // LL or UU + add(t0, str1, cnt2); + ld(tmp1, Address(t0)); + add(t0, str2, cnt2); + ld(tmp2, Address(t0)); + addi(cnt2, cnt2, 8); + } else if (isLU) { // LU case + add(t0, str1, cnt1); + lwu(tmp1, Address(t0)); + add(t0, str2, cnt2); + ld(tmp2, Address(t0)); + addi(cnt1, cnt1, 4); + inflate_lo32(tmp3, tmp1); + mv(tmp1, tmp3); + addi(cnt2, cnt2, 8); + } else { // UL case + add(t0, str2, cnt2); + lwu(tmp2, Address(t0)); + add(t0, str1, cnt1); + ld(tmp1, Address(t0)); + inflate_lo32(tmp3, tmp2); + mv(tmp2, tmp3); + addi(cnt1, cnt1, 8); + addi(cnt2, cnt2, 4); + } + bgez(cnt2, TAIL); + + xorr(tmp3, tmp1, tmp2); + beqz(tmp3, NEXT_WORD); + j(DIFFERENCE); + bind(TAIL); + xorr(tmp3, tmp1, tmp2); + bnez(tmp3, DIFFERENCE); + // Last longword. In the case where length == 4 we compare the + // same longword twice, but that's still faster than another + // conditional branch. + if (str1_isL == str2_isL) { // LL or UU + ld(tmp1, Address(str1)); + ld(tmp2, Address(str2)); + } else if (isLU) { // LU case + lwu(tmp1, Address(str1)); + ld(tmp2, Address(str2)); + inflate_lo32(tmp3, tmp1); + mv(tmp1, tmp3); + } else { // UL case + lwu(tmp2, Address(str2)); + ld(tmp1, Address(str1)); + inflate_lo32(tmp3, tmp2); + mv(tmp2, tmp3); + } + bind(TAIL_CHECK); + xorr(tmp3, tmp1, tmp2); + beqz(tmp3, DONE); + + // Find the first different characters in the longwords and + // compute their difference. + bind(DIFFERENCE); + ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb + srl(tmp1, tmp1, result); + srl(tmp2, tmp2, result); + if (isLL) { + andi(tmp1, tmp1, 0xFF); + andi(tmp2, tmp2, 0xFF); + } else { + andi(tmp1, tmp1, 0xFFFF); + andi(tmp2, tmp2, 0xFFFF); + } + sub(result, tmp1, tmp2); + j(DONE); + } + + bind(STUB); + RuntimeAddress stub = NULL; + switch (ae) { + case StrIntrinsicNode::LL: + stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); + break; + case StrIntrinsicNode::UU: + stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); + break; + case StrIntrinsicNode::LU: + stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); + break; + case StrIntrinsicNode::UL: + stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); + break; + default: + ShouldNotReachHere(); + } + assert(stub.target() != NULL, "compare_long_string stub has not been generated"); + trampoline_call(stub); + j(DONE); + + bind(SHORT_STRING); + // Is the minimum length zero? + beqz(cnt2, DONE); + // arrange code to do most branches while loading and loading next characters + // while comparing previous + (this->*str1_load_chr)(tmp1, Address(str1), t0); + addi(str1, str1, str1_chr_size); + addi(cnt2, cnt2, -1); + beqz(cnt2, SHORT_LAST_INIT); + (this->*str2_load_chr)(cnt1, Address(str2), t0); + addi(str2, str2, str2_chr_size); + j(SHORT_LOOP_START); + bind(SHORT_LOOP); + addi(cnt2, cnt2, -1); + beqz(cnt2, SHORT_LAST); + bind(SHORT_LOOP_START); + (this->*str1_load_chr)(tmp2, Address(str1), t0); + addi(str1, str1, str1_chr_size); + (this->*str2_load_chr)(t0, Address(str2), t0); + addi(str2, str2, str2_chr_size); + bne(tmp1, cnt1, SHORT_LOOP_TAIL); + addi(cnt2, cnt2, -1); + beqz(cnt2, SHORT_LAST2); + (this->*str1_load_chr)(tmp1, Address(str1), t0); + addi(str1, str1, str1_chr_size); + (this->*str2_load_chr)(cnt1, Address(str2), t0); + addi(str2, str2, str2_chr_size); + beq(tmp2, t0, SHORT_LOOP); + sub(result, tmp2, t0); + j(DONE); + bind(SHORT_LOOP_TAIL); + sub(result, tmp1, cnt1); + j(DONE); + bind(SHORT_LAST2); + beq(tmp2, t0, DONE); + sub(result, tmp2, t0); + + j(DONE); + bind(SHORT_LAST_INIT); + (this->*str2_load_chr)(cnt1, Address(str2), t0); + addi(str2, str2, str2_chr_size); + bind(SHORT_LAST); + beq(tmp1, cnt1, DONE); + sub(result, tmp1, cnt1); + + bind(DONE); + + BLOCK_COMMENT("} string_compare"); +} + +void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, + Register tmp4, Register tmp5, Register tmp6, Register result, + Register cnt1, int elem_size) { + Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; + Register tmp1 = t0; + Register tmp2 = t1; + Register cnt2 = tmp2; // cnt2 only used in array length compare + Register elem_per_word = tmp6; + int log_elem_size = exact_log2(elem_size); + int length_offset = arrayOopDesc::length_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); + + assert(elem_size == 1 || elem_size == 2, "must be char or byte"); + assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); + li(elem_per_word, wordSize / elem_size); + + BLOCK_COMMENT("arrays_equals {"); + + // if (a1 == a2), return true + beq(a1, a2, SAME); + + mv(result, false); + beqz(a1, DONE); + beqz(a2, DONE); + lwu(cnt1, Address(a1, length_offset)); + lwu(cnt2, Address(a2, length_offset)); + bne(cnt2, cnt1, DONE); + beqz(cnt1, SAME); + + slli(tmp5, cnt1, 3 + log_elem_size); + sub(tmp5, zr, tmp5); + add(a1, a1, base_offset); + add(a2, a2, base_offset); + ld(tmp3, Address(a1, 0)); + ld(tmp4, Address(a2, 0)); + ble(cnt1, elem_per_word, SHORT); // short or same + + // Main 16 byte comparison loop with 2 exits + bind(NEXT_DWORD); { + ld(tmp1, Address(a1, wordSize)); + ld(tmp2, Address(a2, wordSize)); + sub(cnt1, cnt1, 2 * wordSize / elem_size); + blez(cnt1, TAIL); + bne(tmp3, tmp4, DONE); + ld(tmp3, Address(a1, 2 * wordSize)); + ld(tmp4, Address(a2, 2 * wordSize)); + add(a1, a1, 2 * wordSize); + add(a2, a2, 2 * wordSize); + ble(cnt1, elem_per_word, TAIL2); + } beq(tmp1, tmp2, NEXT_DWORD); + j(DONE); + + bind(TAIL); + xorr(tmp4, tmp3, tmp4); + xorr(tmp2, tmp1, tmp2); + sll(tmp2, tmp2, tmp5); + orr(tmp5, tmp4, tmp2); + j(IS_TMP5_ZR); + + bind(TAIL2); + bne(tmp1, tmp2, DONE); + + bind(SHORT); + xorr(tmp4, tmp3, tmp4); + sll(tmp5, tmp4, tmp5); + + bind(IS_TMP5_ZR); + bnez(tmp5, DONE); + + bind(SAME); + mv(result, true); + // That's it. + bind(DONE); + + BLOCK_COMMENT("} array_equals"); +} + +// Compare Strings + +// For Strings we're passed the address of the first characters in a1 +// and a2 and the length in cnt1. +// elem_size is the element size in bytes: either 1 or 2. +// There are two implementations. For arrays >= 8 bytes, all +// comparisons (including the final one, which may overlap) are +// performed 8 bytes at a time. For strings < 8 bytes, we compare a +// halfword, then a short, and then a byte. + +void C2_MacroAssembler::string_equals(Register a1, Register a2, + Register result, Register cnt1, int elem_size) +{ + Label SAME, DONE, SHORT, NEXT_WORD; + Register tmp1 = t0; + Register tmp2 = t1; + + assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); + assert_different_registers(a1, a2, result, cnt1, t0, t1); + + BLOCK_COMMENT("string_equals {"); + + mv(result, false); + + // Check for short strings, i.e. smaller than wordSize. + sub(cnt1, cnt1, wordSize); + bltz(cnt1, SHORT); + + // Main 8 byte comparison loop. + bind(NEXT_WORD); { + ld(tmp1, Address(a1, 0)); + add(a1, a1, wordSize); + ld(tmp2, Address(a2, 0)); + add(a2, a2, wordSize); + sub(cnt1, cnt1, wordSize); + bne(tmp1, tmp2, DONE); + } bgtz(cnt1, NEXT_WORD); + + // Last longword. In the case where length == 4 we compare the + // same longword twice, but that's still faster than another + // conditional branch. + // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when + // length == 4. + add(tmp1, a1, cnt1); + ld(tmp1, Address(tmp1, 0)); + add(tmp2, a2, cnt1); + ld(tmp2, Address(tmp2, 0)); + bne(tmp1, tmp2, DONE); + j(SAME); + + bind(SHORT); + Label TAIL03, TAIL01; + + // 0-7 bytes left. + andi(t0, cnt1, 4); + beqz(t0, TAIL03); + { + lwu(tmp1, Address(a1, 0)); + add(a1, a1, 4); + lwu(tmp2, Address(a2, 0)); + add(a2, a2, 4); + bne(tmp1, tmp2, DONE); + } + + bind(TAIL03); + // 0-3 bytes left. + andi(t0, cnt1, 2); + beqz(t0, TAIL01); + { + lhu(tmp1, Address(a1, 0)); + add(a1, a1, 2); + lhu(tmp2, Address(a2, 0)); + add(a2, a2, 2); + bne(tmp1, tmp2, DONE); + } + + bind(TAIL01); + if (elem_size == 1) { // Only needed when comparing 1-byte elements + // 0-1 bytes left. + andi(t0, cnt1, 1); + beqz(t0, SAME); + { + lbu(tmp1, a1, 0); + lbu(tmp2, a2, 0); + bne(tmp1, tmp2, DONE); + } + } + + // Arrays are equal. + bind(SAME); + mv(result, true); + + // That's it. + bind(DONE); + BLOCK_COMMENT("} string_equals"); +} + +typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); +typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, + bool is_far, bool is_unordered); + +static conditional_branch_insn conditional_branches[] = +{ + /* SHORT branches */ + (conditional_branch_insn)&Assembler::beq, + (conditional_branch_insn)&Assembler::bgt, + NULL, // BoolTest::overflow + (conditional_branch_insn)&Assembler::blt, + (conditional_branch_insn)&Assembler::bne, + (conditional_branch_insn)&Assembler::ble, + NULL, // BoolTest::no_overflow + (conditional_branch_insn)&Assembler::bge, + + /* UNSIGNED branches */ + (conditional_branch_insn)&Assembler::beq, + (conditional_branch_insn)&Assembler::bgtu, + NULL, + (conditional_branch_insn)&Assembler::bltu, + (conditional_branch_insn)&Assembler::bne, + (conditional_branch_insn)&Assembler::bleu, + NULL, + (conditional_branch_insn)&Assembler::bgeu +}; + +static float_conditional_branch_insn float_conditional_branches[] = +{ + /* FLOAT SHORT branches */ + (float_conditional_branch_insn)&MacroAssembler::float_beq, + (float_conditional_branch_insn)&MacroAssembler::float_bgt, + NULL, // BoolTest::overflow + (float_conditional_branch_insn)&MacroAssembler::float_blt, + (float_conditional_branch_insn)&MacroAssembler::float_bne, + (float_conditional_branch_insn)&MacroAssembler::float_ble, + NULL, // BoolTest::no_overflow + (float_conditional_branch_insn)&MacroAssembler::float_bge, + + /* DOUBLE SHORT branches */ + (float_conditional_branch_insn)&MacroAssembler::double_beq, + (float_conditional_branch_insn)&MacroAssembler::double_bgt, + NULL, + (float_conditional_branch_insn)&MacroAssembler::double_blt, + (float_conditional_branch_insn)&MacroAssembler::double_bne, + (float_conditional_branch_insn)&MacroAssembler::double_ble, + NULL, + (float_conditional_branch_insn)&MacroAssembler::double_bge +}; + +void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { + assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), + "invalid conditional branch index"); + (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); +} + +// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use +// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). +void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { + assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), + "invalid float conditional branch index"); + int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask); + (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, + (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); +} + +void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { + switch (cmpFlag) { + case BoolTest::eq: + case BoolTest::le: + beqz(op1, L, is_far); + break; + case BoolTest::ne: + case BoolTest::gt: + bnez(op1, L, is_far); + break; + default: + ShouldNotReachHere(); + } +} + +void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { + switch (cmpFlag) { + case BoolTest::eq: + beqz(op1, L, is_far); + break; + case BoolTest::ne: + bnez(op1, L, is_far); + break; + default: + ShouldNotReachHere(); + } +} + +void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { + Label L; + cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); + mv(dst, src); + bind(L); +} + +// Set dst to NaN if any NaN input. +void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, + bool is_double, bool is_min) { + assert_different_registers(dst, src1, src2); + + Label Done; + fsflags(zr); + if (is_double) { + is_min ? fmin_d(dst, src1, src2) + : fmax_d(dst, src1, src2); + // Checking NaNs + flt_d(zr, src1, src2); + } else { + is_min ? fmin_s(dst, src1, src2) + : fmax_s(dst, src1, src2); + // Checking NaNs + flt_s(zr, src1, src2); + } + + frflags(t0); + beqz(t0, Done); + + // In case of NaNs + is_double ? fadd_d(dst, src1, src2) + : fadd_s(dst, src1, src2); + + bind(Done); +} + +void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, + VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) { + Label loop; + Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16; + + bind(loop); + vsetvli(tmp1, cnt, sew, Assembler::m2); + vlex_v(vr1, a1, sew); + vlex_v(vr2, a2, sew); + vmsne_vv(vrs, vr1, vr2); + vfirst_m(tmp2, vrs); + bgez(tmp2, DONE); + sub(cnt, cnt, tmp1); + if (!islatin) { + slli(tmp1, tmp1, 1); // get byte counts + } + add(a1, a1, tmp1); + add(a2, a2, tmp1); + bnez(cnt, loop); + + mv(result, true); +} + +void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) { + Label DONE; + Register tmp1 = t0; + Register tmp2 = t1; + + BLOCK_COMMENT("string_equals_v {"); + + mv(result, false); + + if (elem_size == 2) { + srli(cnt, cnt, 1); + } + + element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); + + bind(DONE); + BLOCK_COMMENT("} string_equals_v"); +} + +// used by C2 ClearArray patterns. +// base: Address of a buffer to be zeroed +// cnt: Count in HeapWords +// +// base, cnt, v0, v1 and t0 are clobbered. +void C2_MacroAssembler::clear_array_v(Register base, Register cnt) { + Label loop; + + // making zero words + vsetvli(t0, cnt, Assembler::e64, Assembler::m4); + vxor_vv(v0, v0, v0); + + bind(loop); + vsetvli(t0, cnt, Assembler::e64, Assembler::m4); + vse64_v(v0, base); + sub(cnt, cnt, t0); + shadd(base, t0, base, t0, 3); + bnez(cnt, loop); +} + +void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result, + Register cnt1, int elem_size) { + Label DONE; + Register tmp1 = t0; + Register tmp2 = t1; + Register cnt2 = tmp2; + int length_offset = arrayOopDesc::length_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); + + BLOCK_COMMENT("arrays_equals_v {"); + + // if (a1 == a2), return true + mv(result, true); + beq(a1, a2, DONE); + + mv(result, false); + // if a1 == null or a2 == null, return false + beqz(a1, DONE); + beqz(a2, DONE); + // if (a1.length != a2.length), return false + lwu(cnt1, Address(a1, length_offset)); + lwu(cnt2, Address(a2, length_offset)); + bne(cnt1, cnt2, DONE); + + la(a1, Address(a1, base_offset)); + la(a2, Address(a2, base_offset)); + + element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); + + bind(DONE); + + BLOCK_COMMENT("} arrays_equals_v"); +} + +void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, + Register result, Register tmp1, Register tmp2, int encForm) { + Label DIFFERENCE, DONE, L, loop; + bool encLL = encForm == StrIntrinsicNode::LL; + bool encLU = encForm == StrIntrinsicNode::LU; + bool encUL = encForm == StrIntrinsicNode::UL; + + bool str1_isL = encLL || encLU; + bool str2_isL = encLL || encUL; + + int minCharsInWord = encLL ? wordSize : wordSize / 2; + + BLOCK_COMMENT("string_compare {"); + + // for Lating strings, 1 byte for 1 character + // for UTF16 strings, 2 bytes for 1 character + if (!str1_isL) + sraiw(cnt1, cnt1, 1); + if (!str2_isL) + sraiw(cnt2, cnt2, 1); + + // if str1 == str2, return the difference + // save the minimum of the string lengths in cnt2. + sub(result, cnt1, cnt2); + bgt(cnt1, cnt2, L); + mv(cnt2, cnt1); + bind(L); + + if (str1_isL == str2_isL) { // LL or UU + element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE); + j(DONE); + } else { // LU or UL + Register strL = encLU ? str1 : str2; + Register strU = encLU ? str2 : str1; + VectorRegister vstr1 = encLU ? v4 : v0; + VectorRegister vstr2 = encLU ? v0 : v4; + + bind(loop); + vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2); + vle8_v(vstr1, strL); + vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4); + vzext_vf2(vstr2, vstr1); + vle16_v(vstr1, strU); + vmsne_vv(v0, vstr2, vstr1); + vfirst_m(tmp2, v0); + bgez(tmp2, DIFFERENCE); + sub(cnt2, cnt2, tmp1); + add(strL, strL, tmp1); + shadd(strU, tmp1, strU, tmp1, 1); + bnez(cnt2, loop); + j(DONE); + } + bind(DIFFERENCE); + slli(tmp1, tmp2, 1); + add(str1, str1, str1_isL ? tmp2 : tmp1); + add(str2, str2, str2_isL ? tmp2 : tmp1); + str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0)); + str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0)); + sub(result, tmp1, tmp2); + + bind(DONE); +} + +void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) { + Label loop; + assert_different_registers(src, dst, len, tmp, t0); + + BLOCK_COMMENT("byte_array_inflate_v {"); + bind(loop); + vsetvli(tmp, len, Assembler::e8, Assembler::m2); + vle8_v(v2, src); + vsetvli(t0, len, Assembler::e16, Assembler::m4); + vzext_vf2(v0, v2); + vse16_v(v0, dst); + sub(len, len, tmp); + add(src, src, tmp); + shadd(dst, tmp, dst, tmp, 1); + bnez(len, loop); + BLOCK_COMMENT("} byte_array_inflate_v"); +} + +// Compress char[] array to byte[]. +// result: the array length if every element in array can be encoded; 0, otherwise. +void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) { + Label done; + encode_iso_array_v(src, dst, len, result, tmp); + beqz(len, done); + mv(result, zr); + bind(done); +} + +// result: the number of elements had been encoded. +void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) { + Label loop, DIFFERENCE, DONE; + + BLOCK_COMMENT("encode_iso_array_v {"); + mv(result, 0); + + bind(loop); + mv(tmp, 0xff); + vsetvli(t0, len, Assembler::e16, Assembler::m2); + vle16_v(v2, src); + // if element > 0xff, stop + vmsgtu_vx(v1, v2, tmp); + vfirst_m(tmp, v1); + vmsbf_m(v0, v1); + // compress char to byte + vsetvli(t0, len, Assembler::e8); + vncvt_x_x_w(v1, v2, Assembler::v0_t); + vse8_v(v1, dst, Assembler::v0_t); + + bgez(tmp, DIFFERENCE); + add(result, result, t0); + add(dst, dst, t0); + sub(len, len, t0); + shadd(src, t0, src, t0, 1); + bnez(len, loop); + j(DONE); + + bind(DIFFERENCE); + add(result, result, tmp); + + bind(DONE); + BLOCK_COMMENT("} encode_iso_array_v"); +} + +void C2_MacroAssembler::has_negatives_v(Register ary, Register len, Register result, Register tmp) { + Label loop, DONE; + + BLOCK_COMMENT("has_negatives_v {"); + mv(result, true); + + bind(loop); + vsetvli(t0, len, Assembler::e8, Assembler::m4); + vle8_v(v0, ary); + // if element highest bit is set, return true + vmslt_vx(v0, v0, zr); + vfirst_m(tmp, v0); + bgez(tmp, DONE); + + sub(len, len, t0); + add(ary, ary, t0); + bnez(len, loop); + mv(result, false); + + bind(DONE); + BLOCK_COMMENT("} has_negatives_v"); +} + +void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, + bool isL) { + mv(result, zr); + + Label loop, MATCH, DONE; + Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16; + bind(loop); + vsetvli(tmp1, cnt1, sew, Assembler::m4); + vlex_v(v0, str1, sew); + vmseq_vx(v0, v0, ch); + vfirst_m(tmp2, v0); + bgez(tmp2, MATCH); // if equal, return index + + add(result, result, tmp1); + sub(cnt1, cnt1, tmp1); + if (!isL) slli(tmp1, tmp1, 1); + add(str1, str1, tmp1); + bnez(cnt1, loop); + + mv(result, -1); + j(DONE); + + bind(MATCH); + add(result, result, tmp2); + + bind(DONE); +} + +// Set dst to NaN if any NaN input. +void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, + bool is_double, bool is_min) { + assert_different_registers(dst, src1, src2); + + vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); + + is_min ? vfmin_vv(dst, src1, src2) + : vfmax_vv(dst, src1, src2); + + vmfne_vv(v0, src1, src1); + vfadd_vv(dst, src1, src1, Assembler::v0_t); + vmfne_vv(v0, src2, src2); + vfadd_vv(dst, src2, src2, Assembler::v0_t); +} + +// Set dst to NaN if any NaN input. +void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst, + FloatRegister src1, VectorRegister src2, + VectorRegister tmp1, VectorRegister tmp2, + bool is_double, bool is_min) { + assert_different_registers(src2, tmp1, tmp2); + + Label L_done, L_NaN; + vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); + vfmv_s_f(tmp2, src1); + + is_min ? vfredmin_vs(tmp1, src2, tmp2) + : vfredmax_vs(tmp1, src2, tmp2); + + fsflags(zr); + // Checking NaNs + vmflt_vf(tmp2, src2, src1); + frflags(t0); + bnez(t0, L_NaN); + j(L_done); + + bind(L_NaN); + vfmv_s_f(tmp2, src1); + vfredsum_vs(tmp1, src2, tmp2); + + bind(L_done); + vfmv_f_s(dst, tmp1); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP +#define CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP + +// C2_MacroAssembler contains high-level macros for C2 + + private: + void element_compare(Register r1, Register r2, + Register result, Register cnt, + Register tmp1, Register tmp2, + VectorRegister vr1, VectorRegister vr2, + VectorRegister vrs, + bool is_latin, Label& DONE); + public: + + void string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, + Register tmp1, Register tmp2, Register tmp3, + int ae); + + void string_indexof_char_short(Register str1, Register cnt1, + Register ch, Register result, + bool isL); + + void string_indexof_char(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + bool isL); + + void string_indexof(Register str1, Register str2, + Register cnt1, Register cnt2, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + Register tmp5, Register tmp6, + Register result, int ae); + + void string_indexof_linearscan(Register haystack, Register needle, + Register haystack_len, Register needle_len, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + int needle_con_cnt, Register result, int ae); + + void arrays_equals(Register r1, Register r2, + Register tmp3, Register tmp4, + Register tmp5, Register tmp6, + Register result, Register cnt1, + int elem_size); + + void string_equals(Register r1, Register r2, + Register result, Register cnt1, + int elem_size); + + // refer to conditional_branches and float_conditional_branches + static const int bool_test_bits = 3; + static const int neg_cond_bits = 2; + static const int unsigned_branch_mask = 1 << bool_test_bits; + static const int double_branch_mask = 1 << bool_test_bits; + + // cmp + void cmp_branch(int cmpFlag, + Register op1, Register op2, + Label& label, bool is_far = false); + + void float_cmp_branch(int cmpFlag, + FloatRegister op1, FloatRegister op2, + Label& label, bool is_far = false); + + void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op, + Label& L, bool is_far = false); + + void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op, + Label& L, bool is_far = false); + + void enc_cmove(int cmpFlag, + Register op1, Register op2, + Register dst, Register src); + + void spill(Register r, bool is64, int offset) { + is64 ? sd(r, Address(sp, offset)) + : sw(r, Address(sp, offset)); + } + + void spill(FloatRegister f, bool is64, int offset) { + is64 ? fsd(f, Address(sp, offset)) + : fsw(f, Address(sp, offset)); + } + + void spill(VectorRegister v, int offset) { + add(t0, sp, offset); + vs1r_v(v, t0); + } + + void unspill(Register r, bool is64, int offset) { + is64 ? ld(r, Address(sp, offset)) + : lw(r, Address(sp, offset)); + } + + void unspillu(Register r, bool is64, int offset) { + is64 ? ld(r, Address(sp, offset)) + : lwu(r, Address(sp, offset)); + } + + void unspill(FloatRegister f, bool is64, int offset) { + is64 ? fld(f, Address(sp, offset)) + : flw(f, Address(sp, offset)); + } + + void unspill(VectorRegister v, int offset) { + add(t0, sp, offset); + vl1r_v(v, t0); + } + + void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) { + assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size"); + unspill(v0, src_offset); + spill(v0, dst_offset); + } + + void minmax_FD(FloatRegister dst, + FloatRegister src1, FloatRegister src2, + bool is_double, bool is_min); + + // intrinsic methods implemented by rvv instructions + void string_equals_v(Register r1, Register r2, + Register result, Register cnt1, + int elem_size); + + void arrays_equals_v(Register r1, Register r2, + Register result, Register cnt1, + int elem_size); + + void string_compare_v(Register str1, Register str2, + Register cnt1, Register cnt2, + Register result, + Register tmp1, Register tmp2, + int encForm); + + void clear_array_v(Register base, Register cnt); + + void byte_array_inflate_v(Register src, Register dst, + Register len, Register tmp); + + void char_array_compress_v(Register src, Register dst, + Register len, Register result, + Register tmp); + + void encode_iso_array_v(Register src, Register dst, + Register len, Register result, + Register tmp); + + void has_negatives_v(Register ary, Register len, + Register result, Register tmp); + + void string_indexof_char_v(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, + bool isL); + + void minmax_FD_v(VectorRegister dst, + VectorRegister src1, VectorRegister src2, + bool is_double, bool is_min); + + void reduce_minmax_FD_v(FloatRegister dst, + FloatRegister src1, VectorRegister src2, + VectorRegister tmp1, VectorRegister tmp2, + bool is_double, bool is_min); + +#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_globals_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_globals_riscv.hpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP +#define CPU_RISCV_C2_GLOBALS_RISCV_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the server compiler. +// (see c2_globals.hpp). Alpha-sorted. + +define_pd_global(bool, BackgroundCompilation, true); +define_pd_global(bool, CICompileOSR, true); +define_pd_global(bool, InlineIntrinsics, true); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, true); +define_pd_global(bool, UseOnStackReplacement, true); +define_pd_global(bool, ProfileInterpreter, true); +define_pd_global(bool, TieredCompilation, COMPILER1_PRESENT(true) NOT_COMPILER1(false)); +define_pd_global(intx, CompileThreshold, 10000); + +define_pd_global(intx, OnStackReplacePercentage, 140); +define_pd_global(intx, ConditionalMoveLimit, 0); +define_pd_global(intx, FLOATPRESSURE, 32); +define_pd_global(intx, FreqInlineSize, 325); +define_pd_global(intx, MinJumpTableSize, 10); +define_pd_global(intx, INTPRESSURE, 24); +define_pd_global(intx, InteriorEntryAlignment, 16); +define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); +define_pd_global(intx, LoopUnrollLimit, 60); +define_pd_global(intx, LoopPercentProfileLimit, 10); +// InitialCodeCacheSize derived from specjbb2000 run. +define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize +define_pd_global(intx, CodeCacheExpansionSize, 64*K); + +// Ergonomics related flags +define_pd_global(uint64_t,MaxRAM, 128ULL*G); +define_pd_global(intx, RegisterCostAreaRatio, 16000); + +// Peephole and CISC spilling both break the graph, and so makes the +// scheduler sick. +define_pd_global(bool, OptoPeephole, false); +define_pd_global(bool, UseCISCSpill, false); +define_pd_global(bool, OptoScheduling, true); +define_pd_global(bool, OptoBundling, false); +define_pd_global(bool, OptoRegScheduling, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); +define_pd_global(bool, IdealizeClearArrayNode, true); + +define_pd_global(intx, ReservedCodeCacheSize, 48*M); +define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); +define_pd_global(intx, ProfiledCodeHeapSize, 22*M); +define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(uintx, CodeCacheMinBlockLength, 6); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + +// Ergonomics related flags +define_pd_global(bool, NeverActAsServerClassMachine, false); + +define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed. + +#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_init_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_init_riscv.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "opto/compile.hpp" +#include "opto/node.hpp" + +// processor dependent initialization for riscv + +extern void reg_mask_init(); + +void Compile::pd_compiler2_init() { + guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); + reg_mask_init(); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "opto/compile.hpp" +#include "opto/node.hpp" +#include "opto/output.hpp" +#include "runtime/sharedRuntime.hpp" + +#define __ masm. +void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const { + assert(SharedRuntime::polling_page_return_handler_blob() != NULL, + "polling page return stub not created yet"); + address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); + RuntimeAddress callback_addr(stub); + + __ bind(entry->_stub_label); + InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset); + masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec()); + __ la(t0, safepoint_pc.target()); + __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); + __ far_jump(callback_addr); +} +#undef __ Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP +#define CPU_RISCV_CODEBUFFER_RISCV_HPP + +private: + void pd_initialize() {} + +public: + void flush_bundle(bool start_new_bundle) {} + +#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/compiledIC_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/compiledIC_riscv.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/compiledIC.hpp" +#include "code/icBuffer.hpp" +#include "code/nmethod.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/safepoint.hpp" + +// ---------------------------------------------------------------------------- + +#define __ _masm. +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { + precond(cbuf.stubs()->start() != badAddress); + precond(cbuf.stubs()->end() != badAddress); + // Stub is fixed up when the corresponding call is converted from + // calling compiled code to calling interpreted code. + // mv xmethod, 0 + // jalr -4 # to self + + if (mark == NULL) { + mark = cbuf.insts_mark(); // Get mark within main instrs section. + } + + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a stub. + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(to_interp_stub_size()); + int offset = __ offset(); + if (base == NULL) { + return NULL; // CodeBuffer::expand failed + } + // static stub relocation stores the instruction address of the call + __ relocate(static_stub_Relocation::spec(mark)); + + __ emit_static_call_stub(); + + assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big"); + __ end_a_stub(); + return base; +} +#undef __ + +int CompiledStaticCall::to_interp_stub_size() { + // fence_i + fence* + (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr + return NativeFenceI::instruction_size() + 12 * NativeInstruction::instruction_size; +} + +int CompiledStaticCall::to_trampoline_stub_size() { + // Somewhat pessimistically, we count 4 instructions here (although + // there are only 3) because we sometimes emit an alignment nop. + // Trampoline stubs are always word aligned. + return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size; +} + +// Relocation entries for call stub, compiled java to interpreter. +int CompiledStaticCall::reloc_to_interp_stub() { + return 4; // 3 in emit_to_interp_stub + 1 in emit_call +} + +void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { + address stub = find_stub(); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { + ResourceMark rm; + tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", + p2i(instruction_address()), + callee->name_and_sig_as_C_string()); + } + + // Creation also verifies the object. + NativeMovConstReg* method_holder + = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); +#ifdef ASSERT + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); + + verify_mt_safe(callee, entry, method_holder, jump); +#endif + // Update stub. + method_holder->set_data((intptr_t)callee()); + NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry); + ICache::invalidate_range(stub, to_interp_stub_size()); + // Update jump to call. + set_destination_mt_safe(stub); +} + +void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); + assert(CompiledICLocker::is_safe(stub), "mt unsafe call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder + = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); + method_holder->set_data(0); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + jump->set_jump_destination((address)-1); +} + +//----------------------------------------------------------------------------- +// Non-product mode code +#ifndef PRODUCT + +void CompiledDirectStaticCall::verify() { + // Verify call. + _call->verify(); + _call->verify_alignment(); + + // Verify stub. + address stub = find_stub(); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder + = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + // Verify state. + assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); +} + +#endif // !PRODUCT Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/copy_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/copy_riscv.hpp @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_COPY_RISCV_HPP +#define CPU_RISCV_COPY_RISCV_HPP + +#include OS_CPU_HEADER(copy) + +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + julong* to = (julong*) tohw; + julong v = ((julong) value << 32) | value; + while (count-- > 0) { + *to++ = v; + } +} + +static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { + pd_fill_to_words(tohw, count, value); +} + +static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { + (void)memset(to, value, count); +} + +static void pd_zero_to_words(HeapWord* tohw, size_t count) { + pd_fill_to_words(tohw, count, 0); +} + +static void pd_zero_to_bytes(void* to, size_t count) { + (void)memset(to, 0, count); +} + +static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + (void)memmove(to, from, count * HeapWordSize); +} + +static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; // fall through + case 7: to[6] = from[6]; // fall through + case 6: to[5] = from[5]; // fall through + case 5: to[4] = from[4]; // fall through + case 4: to[3] = from[3]; // fall through + case 3: to[2] = from[2]; // fall through + case 2: to[1] = from[1]; // fall through + case 1: to[0] = from[0]; // fall through + case 0: break; + default: + memcpy(to, from, count * HeapWordSize); + break; + } +} + +static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: while (count-- > 0) { + *to++ = *from++; + } + break; + } +} + +static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_words(from, to, count); +} + +static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + pd_disjoint_words(from, to, count); +} + +static void pd_conjoint_bytes(const void* from, void* to, size_t count) { + (void)memmove(to, from, count); +} + +static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { + pd_conjoint_bytes(from, to, count); +} + +static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { + _Copy_conjoint_jshorts_atomic(from, to, count); +} + +static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { + _Copy_conjoint_jints_atomic(from, to, count); +} + +static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { + _Copy_conjoint_jlongs_atomic(from, to, count); +} + +static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { + assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size."); + _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); +} + +static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_bytes(from, to, count); +} + +static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_jshorts(from, to, count); +} + +static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_jints(from, to, count); +} + +static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_jlongs(from, to, count); +} + +static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { + assert(!UseCompressedOops, "foo!"); + assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); + _Copy_arrayof_conjoint_jlongs(from, to, count); +} + +#endif // CPU_RISCV_COPY_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/disassembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/disassembler_riscv.hpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP +#define CPU_RISCV_DISASSEMBLER_RISCV_HPP + +static int pd_instruction_alignment() { + return 1; +} + +static const char* pd_cpu_opts() { + return ""; +} + +// Returns address of n-th instruction preceding addr, +// NULL if no preceding instruction can be found. +// On riscv, we assume a constant instruction length. +// It might be beneficial to check "is_readable" as we do on ppc and s390. +static address find_prev_instr(address addr, int n_instr) { + return addr - Assembler::instruction_size * n_instr; +} + +// special-case instruction decoding. +// There may be cases where the binutils disassembler doesn't do +// the perfect job. In those cases, decode_instruction0 may kick in +// and do it right. +// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)" +static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) { + return here; +} + +// platform-specific instruction annotations (like value of loaded constants) +static void annotate(address pc, outputStream* st) {} + +#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "prims/foreign_globals.hpp" +#include "utilities/debug.hpp" + +// Stubbed out, implement later +const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const { + Unimplemented(); + return {}; +} + +const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const { + Unimplemented(); + return {}; +} + +const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const { + ShouldNotCallThis(); + return {}; +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP +#define CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP + +class ABIDescriptor {}; +class BufferLayout {}; + +#endif // CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/frame_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/frame_riscv.cpp @@ -0,0 +1,688 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "compiler/oopMap.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" +#include "oops/markWord.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" +#include "runtime/os.inline.hpp" +#include "runtime/signature.hpp" +#include "runtime/stackWatermarkSet.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_riscv.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#include "runtime/vframeArray.hpp" +#endif + +#ifdef ASSERT +void RegisterMap::check_location_valid() { +} +#endif + + +// Profiling/safepoint support + +bool frame::safe_for_sender(JavaThread *thread) { + address addr_sp = (address)_sp; + address addr_fp = (address)_fp; + address unextended_sp = (address)_unextended_sp; + + // consider stack guards when trying to determine "safe" stack pointers + // sp must be within the usable part of the stack (not in guards) + if (!thread->is_in_usable_stack(addr_sp)) { + return false; + } + + // When we are running interpreted code the machine stack pointer, SP, is + // set low enough so that the Java expression stack can grow and shrink + // without ever exceeding the machine stack bounds. So, ESP >= SP. + + // When we call out of an interpreted method, SP is incremented so that + // the space between SP and ESP is removed. The SP saved in the callee's + // frame is the SP *before* this increment. So, when we walk a stack of + // interpreter frames the sender's SP saved in a frame might be less than + // the SP at the point of call. + + // So unextended sp must be within the stack but we need not to check + // that unextended sp >= sp + + if (!thread->is_in_full_stack_checked(unextended_sp)) { + return false; + } + + // an fp must be within the stack and above (but not equal) sp + // second evaluation on fp+ is added to handle situation where fp is -1 + bool fp_safe = thread->is_in_stack_range_excl(addr_fp, addr_sp) && + thread->is_in_full_stack_checked(addr_fp + (return_addr_offset * sizeof(void*))); + + // We know sp/unextended_sp are safe only fp is questionable here + + // If the current frame is known to the code cache then we can attempt to + // to construct the sender and do some validation of it. This goes a long way + // toward eliminating issues when we get in frame construction code + + if (_cb != NULL) { + + // First check if frame is complete and tester is reliable + // Unfortunately we can only check frame complete for runtime stubs and nmethod + // other generic buffer blobs are more problematic so we just assume they are + // ok. adapter blobs never have a frame complete and are never ok. + + if (!_cb->is_frame_complete_at(_pc)) { + if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { + return false; + } + } + + // Could just be some random pointer within the codeBlob + if (!_cb->code_contains(_pc)) { + return false; + } + + // Entry frame checks + if (is_entry_frame()) { + // an entry frame must have a valid fp. + return fp_safe && is_entry_frame_valid(thread); + } + + intptr_t* sender_sp = NULL; + intptr_t* sender_unextended_sp = NULL; + address sender_pc = NULL; + intptr_t* saved_fp = NULL; + + if (is_interpreted_frame()) { + // fp must be safe + if (!fp_safe) { + return false; + } + + sender_pc = (address)this->fp()[return_addr_offset]; + // for interpreted frames, the value below is the sender "raw" sp, + // which can be different from the sender unextended sp (the sp seen + // by the sender) because of current frame local variables + sender_sp = (intptr_t*) addr_at(sender_sp_offset); + sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; + saved_fp = (intptr_t*) this->fp()[link_offset]; + } else { + // must be some sort of compiled/runtime frame + // fp does not have to be safe (although it could be check for c1?) + + // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc + if (_cb->frame_size() <= 0) { + return false; + } + + sender_sp = _unextended_sp + _cb->frame_size(); + // Is sender_sp safe? + if (!thread->is_in_full_stack_checked((address)sender_sp)) { + return false; + } + + sender_unextended_sp = sender_sp; + sender_pc = (address) *(sender_sp - 1); + saved_fp = (intptr_t*) *(sender_sp - 2); + } + + + // If the potential sender is the interpreter then we can do some more checking + if (Interpreter::contains(sender_pc)) { + + // fp is always saved in a recognizable place in any code we generate. However + // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp + // is really a frame pointer. + if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { + return false; + } + + // construct the potential sender + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + return sender.is_interpreted_frame_valid(thread); + } + + // We must always be able to find a recognizable pc + CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); + if (sender_pc == NULL || sender_blob == NULL) { + return false; + } + + // Could be a zombie method + if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { + return false; + } + + // Could just be some random pointer within the codeBlob + if (!sender_blob->code_contains(sender_pc)) { + return false; + } + + // We should never be able to see an adapter if the current frame is something from code cache + if (sender_blob->is_adapter_blob()) { + return false; + } + + // Could be the call_stub + if (StubRoutines::returns_to_call_stub(sender_pc)) { + if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { + return false; + } + + // construct the potential sender + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + // Validate the JavaCallWrapper an entry frame must have + address jcw = (address)sender.entry_frame_call_wrapper(); + + bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp()); + + return jcw_safe; + } + + CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); + if (nm != NULL) { + if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || + nm->method()->is_method_handle_intrinsic()) { + return false; + } + } + + // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size + // because the return address counts against the callee's frame. + if (sender_blob->frame_size() <= 0) { + assert(!sender_blob->is_compiled(), "should count return address at least"); + return false; + } + + // We should never be able to see anything here except an nmethod. If something in the + // code cache (current frame) is called by an entity within the code cache that entity + // should not be anything but the call stub (already covered), the interpreter (already covered) + // or an nmethod. + if (!sender_blob->is_compiled()) { + return false; + } + + // Could put some more validation for the potential non-interpreted sender + // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... + + // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb + + // We've validated the potential sender that would be created + return true; + } + + // Must be native-compiled frame. Since sender will try and use fp to find + // linkages it must be safe + if (!fp_safe) { + return false; + } + + // Will the pc we fetch be non-zero (which we'll find at the oldest frame) + if ((address)this->fp()[return_addr_offset] == NULL) { return false; } + + return true; +} + +void frame::patch_pc(Thread* thread, address pc) { + assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); + address* pc_addr = &(((address*) sp())[-1]); + if (TracePcPatching) { + tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", + p2i(pc_addr), p2i(*pc_addr), p2i(pc)); + } + // Either the return address is the original one or we are going to + // patch in the same address that's already there. + assert(_pc == *pc_addr || pc == *pc_addr, "must be"); + *pc_addr = pc; + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + assert(original_pc == _pc, "expected original PC to be stored before patching"); + _deopt_state = is_deoptimized; + // leave _pc as is + } else { + _deopt_state = not_deoptimized; + _pc = pc; + } +} + +bool frame::is_interpreted_frame() const { + return Interpreter::contains(pc()); +} + +int frame::frame_size(RegisterMap* map) const { + frame sender = this->sender(map); + return sender.sp() - sp(); +} + +intptr_t* frame::entry_frame_argument_at(int offset) const { + // convert offset to index to deal with tsi + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + // Entry frame's arguments are always in relation to unextended_sp() + return &unextended_sp()[index]; +} + +// sender_sp +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + return (intptr_t*) at(interpreter_frame_sender_sp_offset); +} + +void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { + assert(is_interpreted_frame(), "interpreted frame expected"); + ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); +} + + +// monitor elements + +BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); +} + +BasicObjectLock* frame::interpreter_frame_monitor_end() const { + BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); + // make sure the pointer points inside the frame + assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer"); + assert((intptr_t*) result < fp(), "monitor end should be strictly below the frame pointer"); + return result; +} + +void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { + *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; +} + +// Used by template based interpreter deoptimization +void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) { + *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp; +} + +frame frame::sender_for_entry_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); + assert(!entry_frame_is_first(), "next Java fp must be non zero"); + assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); + // Since we are walking the stack now this nested anchor is obviously walkable + // even if it wasn't when it was stacked. + jfa->make_walkable(); + map->clear(); + assert(map->include_argument_oops(), "should be set by clear"); + vmassert(jfa->last_Java_pc() != NULL, "not walkable"); + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); + return fr; +} + +OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { + ShouldNotCallThis(); + return nullptr; +} + +bool frame::optimized_entry_frame_is_first() const { + ShouldNotCallThis(); + return false; +} + +frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const { + ShouldNotCallThis(); + return {}; +} + +//------------------------------------------------------------------------------ +// frame::verify_deopt_original_pc +// +// Verifies the calculated original PC of a deoptimization PC for the +// given unextended SP. +#ifdef ASSERT +void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { + frame fr; + + // This is ugly but it's better than to change {get,set}_original_pc + // to take an SP value as argument. And it's only a debugging + // method anyway. + fr._unextended_sp = unextended_sp; + + assert_cond(nm != NULL); + address original_pc = nm->get_original_pc(&fr); + assert(nm->insts_contains_inclusive(original_pc), + "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); +} +#endif + +//------------------------------------------------------------------------------ +// frame::adjust_unextended_sp +void frame::adjust_unextended_sp() { + // On riscv, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. + + if (_cb != NULL) { + CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); + if (sender_cm != NULL) { + // If the sender PC is a deoptimization point, get the original PC. + if (sender_cm->is_deopt_entry(_pc) || + sender_cm->is_deopt_mh_entry(_pc)) { + DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); + } + } + } +} + +//------------------------------------------------------------------------------ +// frame::update_map_with_saved_link +void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { + // The interpreter and compiler(s) always save fp in a known + // location on entry. We must record where that location is + // so that if fp was live on callout from c2 we can find + // the saved copy no matter what it called. + + // Since the interpreter always saves fp if we record where it is then + // we don't have to always save fp on entry and exit to c2 compiled + // code, on entry will be enough. + assert(map != NULL, "map must be set"); + map->set_location(::fp->as_VMReg(), (address) link_addr); + // this is weird "H" ought to be at a higher address however the + // oopMaps seems to have the "H" regs at the same address and the + // vanilla register. + map->set_location(::fp->as_VMReg()->next(), (address) link_addr); +} + + +//------------------------------------------------------------------------------ +// frame::sender_for_interpreter_frame +frame frame::sender_for_interpreter_frame(RegisterMap* map) const { + // SP is the raw SP from the sender after adapter or interpreter + // extension. + intptr_t* sender_sp = this->sender_sp(); + + // This is the sp before any possible extension (adapter/locals). + intptr_t* unextended_sp = interpreter_frame_sender_sp(); + +#ifdef COMPILER2 + assert(map != NULL, "map must be set"); + if (map->update_map()) { + update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); + } +#endif // COMPILER2 + + return frame(sender_sp, unextended_sp, link(), sender_pc()); +} + + +//------------------------------------------------------------------------------ +// frame::sender_for_compiled_frame +frame frame::sender_for_compiled_frame(RegisterMap* map) const { + // we cannot rely upon the last fp having been saved to the thread + // in C2 code but it will have been pushed onto the stack. so we + // have to find it relative to the unextended sp + + assert(_cb->frame_size() >= 0, "must have non-zero frame size"); + intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size(); + intptr_t* unextended_sp = l_sender_sp; + + // the return_address is always the word on the stack + address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset); + + intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset); + + assert(map != NULL, "map must be set"); + if (map->update_map()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + if (_cb->oop_maps() != NULL) { + OopMapSet::update_register_map(this, map); + } + + // Since the prolog does the save and restore of FP there is no + // oopmap for it so we must fill in its location as if there was + // an oopmap entry since if our caller was compiled code there + // could be live jvm state in it. + update_map_with_saved_link(map, saved_fp_addr); + } + + return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc); +} + +//------------------------------------------------------------------------------ +// frame::sender_raw +frame frame::sender_raw(RegisterMap* map) const { + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + assert(map != NULL, "map must be set"); + map->set_include_argument_oops(false); + + if (is_entry_frame()) { + return sender_for_entry_frame(map); + } + if (is_interpreted_frame()) { + return sender_for_interpreter_frame(map); + } + assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); + + // This test looks odd: why is it not is_compiled_frame() ? That's + // because stubs also have OOP maps. + if (_cb != NULL) { + return sender_for_compiled_frame(map); + } + + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return frame(sender_sp(), link(), sender_pc()); +} + +frame frame::sender(RegisterMap* map) const { + frame result = sender_raw(map); + + if (map->process_frames()) { + StackWatermarkSet::on_iteration(map->thread(), result); + } + + return result; +} + +bool frame::is_interpreted_frame_valid(JavaThread* thread) const { + assert(is_interpreted_frame(), "Not an interpreted frame"); + // These are reasonable sanity checks + if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) { + return false; + } + if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) { + return false; + } + if (fp() + interpreter_frame_initial_sp_offset < sp()) { + return false; + } + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (fp() <= sp()) { // this attempts to deal with unsigned comparison above + return false; + } + + // do some validation of frame elements + + // first the method + Method* m = *interpreter_frame_method_addr(); + // validate the method we'd find in this potential sender + if (!Method::is_valid_method(m)) { + return false; + } + + // stack frames shouldn't be much larger than max_stack elements + // this test requires the use of unextended_sp which is the sp as seen by + // the current frame, and not sp which is the "raw" pc which could point + // further because of local variables of the callee method inserted after + // method arguments + if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) { + return false; + } + + // validate bci/bcx + address bcp = interpreter_frame_bcp(); + if (m->validate_bci_from_bcp(bcp) < 0) { + return false; + } + + // validate constantPoolCache* + ConstantPoolCache* cp = *interpreter_frame_cache_addr(); + if (MetaspaceObj::is_valid(cp) == false) { + return false; + } + + // validate locals + address locals = (address) *interpreter_frame_locals_addr(); + if (locals > thread->stack_base() || locals < (address) fp()) { + return false; + } + + // We'd have to be pretty unlucky to be mislead at this point + return true; +} + +BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { + assert(is_interpreted_frame(), "interpreted frame expected"); + Method* method = interpreter_frame_method(); + BasicType type = method->result_type(); + + intptr_t* tos_addr = NULL; + if (method->is_native()) { + tos_addr = (intptr_t*)sp(); + if (type == T_FLOAT || type == T_DOUBLE) { + // This is because we do a push(ltos) after push(dtos) in generate_native_entry. + tos_addr += 2 * Interpreter::stackElementWords; + } + } else { + tos_addr = (intptr_t*)interpreter_frame_tos_address(); + } + + switch (type) { + case T_OBJECT : + case T_ARRAY : { + oop obj; + if (method->is_native()) { + obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); + } else { + oop* obj_p = (oop*)tos_addr; + obj = (obj_p == NULL) ? (oop)NULL : *obj_p; + } + assert(Universe::is_in_heap_or_null(obj), "sanity check"); + *oop_result = obj; + break; + } + case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; + case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; + case T_CHAR : value_result->c = *(jchar*)tos_addr; break; + case T_SHORT : value_result->s = *(jshort*)tos_addr; break; + case T_INT : value_result->i = *(jint*)tos_addr; break; + case T_LONG : value_result->j = *(jlong*)tos_addr; break; + case T_FLOAT : { + value_result->f = *(jfloat*)tos_addr; + break; + } + case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + + return type; +} + + +intptr_t* frame::interpreter_frame_tos_at(jint offset) const { + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + return &interpreter_frame_tos_address()[index]; +} + +#ifndef PRODUCT + +#define DESCRIBE_FP_OFFSET(name) \ + values.describe(frame_no, fp() + frame::name##_offset, #name) + +void frame::describe_pd(FrameValues& values, int frame_no) { + if (is_interpreted_frame()) { + DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_method); + DESCRIBE_FP_OFFSET(interpreter_frame_mdp); + DESCRIBE_FP_OFFSET(interpreter_frame_mirror); + DESCRIBE_FP_OFFSET(interpreter_frame_cache); + DESCRIBE_FP_OFFSET(interpreter_frame_locals); + DESCRIBE_FP_OFFSET(interpreter_frame_bcp); + DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); + } +} +#endif + +intptr_t *frame::initial_deoptimization_info() { + // Not used on riscv, but we must return something. + return NULL; +} + +intptr_t* frame::real_fp() const { + if (_cb != NULL) { + // use the frame size if valid + int size = _cb->frame_size(); + if (size > 0) { + return unextended_sp() + size; + } + } + // else rely on fp() + assert(!is_compiled_frame(), "unknown compiled frame size"); + return fp(); +} + +#undef DESCRIBE_FP_OFFSET + +#ifndef PRODUCT +// This is a generic constructor which is only used by pns() in debug.cpp. +frame::frame(void* ptr_sp, void* ptr_fp, void* pc) { + init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc); +} + +void frame::pd_ps() {} +#endif + +void JavaFrameAnchor::make_walkable() { + // last frame set? + if (last_Java_sp() == NULL) { return; } + // already walkable? + if (walkable()) { return; } + vmassert(last_Java_sp() != NULL, "not called from Java code?"); + vmassert(last_Java_pc() == NULL, "already walkable"); + _last_Java_pc = (address)_last_Java_sp[-1]; + vmassert(walkable(), "something went wrong"); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/frame_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/frame_riscv.hpp @@ -0,0 +1,202 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_FRAME_RISCV_HPP +#define CPU_RISCV_FRAME_RISCV_HPP + +#include "runtime/synchronizer.hpp" + +// A frame represents a physical stack frame (an activation). Frames can be +// C or Java frames, and the Java frames can be interpreted or compiled. +// In contrast, vframes represent source-level activations, so that one physical frame +// can correspond to multiple source level frames because of inlining. +// A frame is comprised of {pc, fp, sp} +// ------------------------------ Asm interpreter ---------------------------------------- +// Layout of asm interpreter frame: +// [expression stack ] * <- sp + +// [monitors[0] ] \ +// ... | monitor block size = k +// [monitors[k-1] ] / +// [frame initial esp ] ( == &monitors[0], initially here) initial_sp_offset +// [byte code index/pointr] = bcx() bcx_offset + +// [pointer to locals ] = locals() locals_offset +// [constant pool cache ] = cache() cache_offset + +// [klass of method ] = mirror() mirror_offset +// [padding ] + +// [methodData ] = mdp() mdx_offset +// [Method ] = method() method_offset + +// [last esp ] = last_sp() last_sp_offset +// [old stack pointer ] (sender_sp) sender_sp_offset + +// [old frame pointer ] +// [return pc ] + +// [last sp ] <- fp = link() +// [oop temp ] (only for native calls) + +// [padding ] (to preserve machine SP alignment) +// [locals and parameters ] +// <- sender sp +// ------------------------------ Asm interpreter ---------------------------------------- + +// ------------------------------ C Frame ------------------------------------------------ +// Stack: gcc with -fno-omit-frame-pointer +// . +// . +// +-> . +// | +-----------------+ | +// | | return address | | +// | | previous fp ------+ +// | | saved registers | +// | | local variables | +// | | ... | <-+ +// | +-----------------+ | +// | | return address | | +// +------ previous fp | | +// | saved registers | | +// | local variables | | +// +-> | ... | | +// | +-----------------+ | +// | | return address | | +// | | previous fp ------+ +// | | saved registers | +// | | local variables | +// | | ... | <-+ +// | +-----------------+ | +// | | return address | | +// +------ previous fp | | +// | saved registers | | +// | local variables | | +// $fp --> | ... | | +// +-----------------+ | +// | return address | | +// | previous fp ------+ +// | saved registers | +// $sp --> | local variables | +// +-----------------+ +// ------------------------------ C Frame ------------------------------------------------ + + public: + enum { + pc_return_offset = 0, + // All frames + link_offset = -2, + return_addr_offset = -1, + sender_sp_offset = 0, + // Interpreter frames + interpreter_frame_oop_temp_offset = 1, // for native calls only + + interpreter_frame_sender_sp_offset = -3, + // outgoing sp before a call to an invoked method + interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, + interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1, + interpreter_frame_mdp_offset = interpreter_frame_method_offset - 1, + interpreter_frame_padding_offset = interpreter_frame_mdp_offset - 1, + interpreter_frame_mirror_offset = interpreter_frame_padding_offset - 1, + interpreter_frame_cache_offset = interpreter_frame_mirror_offset - 1, + interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1, + interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1, + interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, + + interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, + interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, + + // Entry frames + // n.b. these values are determined by the layout defined in + // stubGenerator for the Java call stub + entry_frame_after_call_words = 22, + entry_frame_call_wrapper_offset = -10, + + // we don't need a save area + arg_reg_save_area_bytes = 0 + }; + + intptr_t ptr_at(int offset) const { + return *ptr_at_addr(offset); + } + + void ptr_at_put(int offset, intptr_t value) { + *ptr_at_addr(offset) = value; + } + + private: + // an additional field beyond _sp and _pc: + intptr_t* _fp; // frame pointer + // The interpreter and adapters will extend the frame of the caller. + // Since oopMaps are based on the sp of the caller before extension + // we need to know that value. However in order to compute the address + // of the return address we need the real "raw" sp. Since sparc already + // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's + // original sp we use that convention. + + intptr_t* _unextended_sp; + void adjust_unextended_sp(); + + intptr_t* ptr_at_addr(int offset) const { + return (intptr_t*) addr_at(offset); + } + +#ifdef ASSERT + // Used in frame::sender_for_{interpreter,compiled}_frame + static void verify_deopt_original_pc( CompiledMethod* nm, intptr_t* unextended_sp); +#endif + + public: + // Constructors + + frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc); + + frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc); + + frame(intptr_t* ptr_sp, intptr_t* ptr_fp); + + void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc); + + // accessors for the instance variables + // Note: not necessarily the real 'frame pointer' (see real_fp) + intptr_t* fp() const { return _fp; } + + inline address* sender_pc_addr() const; + + // expression stack tos if we are nested in a java call + intptr_t* interpreter_frame_last_sp() const; + + // helper to update a map with callee-saved RBP + static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); + + // deoptimization support + void interpreter_frame_set_last_sp(intptr_t* last_sp); + + static jint interpreter_frame_expression_stack_direction() { return -1; } + + // returns the sending frame, without applying any barriers + frame sender_raw(RegisterMap* map) const; + +#endif // CPU_RISCV_FRAME_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/frame_riscv.inline.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/frame_riscv.inline.hpp @@ -0,0 +1,245 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP +#define CPU_RISCV_FRAME_RISCV_INLINE_HPP + +#include "code/codeCache.hpp" +#include "code/vmreg.inline.hpp" + +// Inline functions for RISCV frames: + +// Constructors: + +inline frame::frame() { + _pc = NULL; + _sp = NULL; + _unextended_sp = NULL; + _fp = NULL; + _cb = NULL; + _deopt_state = unknown; +} + +static int spin; + +inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) { + intptr_t a = intptr_t(ptr_sp); + intptr_t b = intptr_t(ptr_fp); + _sp = ptr_sp; + _unextended_sp = ptr_sp; + _fp = ptr_fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) { + init(ptr_sp, ptr_fp, pc); +} + +inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) { + intptr_t a = intptr_t(ptr_sp); + intptr_t b = intptr_t(ptr_fp); + _sp = ptr_sp; + _unextended_sp = unextended_sp; + _fp = ptr_fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc), + "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) { + intptr_t a = intptr_t(ptr_sp); + intptr_t b = intptr_t(ptr_fp); + _sp = ptr_sp; + _unextended_sp = ptr_sp; + _fp = ptr_fp; + _pc = (address)(ptr_sp[-1]); + + // Here's a sticky one. This constructor can be called via AsyncGetCallTrace + // when last_Java_sp is non-null but the pc fetched is junk. If we are truly + // unlucky the junk value could be to a zombied method and we'll die on the + // find_blob call. This is also why we can have no asserts on the validity + // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler + // -> pd_last_frame should use a specialized version of pd_last_frame which could + // call a specilaized frame constructor instead of this one. + // Then we could use the assert below. However this assert is of somewhat dubious + // value. + + _cb = CodeCache::find_blob(_pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +// Accessors + +inline bool frame::equal(frame other) const { + bool ret = sp() == other.sp() && + unextended_sp() == other.unextended_sp() && + fp() == other.fp() && + pc() == other.pc(); + assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); + return ret; +} + +// Return unique id for this frame. The id must have a value where we can distinguish +// identity and younger/older relationship. NULL represents an invalid (incomparable) +// frame. +inline intptr_t* frame::id(void) const { return unextended_sp(); } + +// Return true if the frame is older (less recent activation) than the frame represented by id +inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() > id ; } + +inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); } + +inline intptr_t* frame::link_or_null() const { + intptr_t** ptr = (intptr_t **)addr_at(link_offset); + return os::is_readable_pointer(ptr) ? *ptr : NULL; +} + +inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } + +// Return address +inline address* frame::sender_pc_addr() const { return (address*) addr_at(return_addr_offset); } +inline address frame::sender_pc() const { return *sender_pc_addr(); } +inline intptr_t* frame::sender_sp() const { return addr_at(sender_sp_offset); } + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + return (intptr_t**)addr_at(interpreter_frame_locals_offset); +} + +inline intptr_t* frame::interpreter_frame_last_sp() const { + return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); +} + +inline intptr_t* frame::interpreter_frame_bcp_addr() const { + return (intptr_t*)addr_at(interpreter_frame_bcp_offset); +} + +inline intptr_t* frame::interpreter_frame_mdp_addr() const { + return (intptr_t*)addr_at(interpreter_frame_mdp_offset); +} + + +// Constant pool cache + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); +} + +// Method + +inline Method** frame::interpreter_frame_method_addr() const { + return (Method**)addr_at(interpreter_frame_method_offset); +} + +// Mirror + +inline oop* frame::interpreter_frame_mirror_addr() const { + return (oop*)addr_at(interpreter_frame_mirror_offset); +} + +// top of expression stack +inline intptr_t* frame::interpreter_frame_tos_address() const { + intptr_t* last_sp = interpreter_frame_last_sp(); + if (last_sp == NULL) { + return sp(); + } else { + // sp() may have been extended or shrunk by an adapter. At least + // check that we don't fall behind the legal region. + // For top deoptimized frame last_sp == interpreter_frame_monitor_end. + assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos"); + return last_sp; + } +} + +inline oop* frame::interpreter_frame_temp_oop_addr() const { + return (oop *)(fp() + interpreter_frame_oop_temp_offset); +} + +inline int frame::interpreter_frame_monitor_size() { + return BasicObjectLock::size(); +} + + +// expression stack +// (the max_stack arguments are used by the GC; see class FrameClosure) + +inline intptr_t* frame::interpreter_frame_expression_stack() const { + intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); + return monitor_end-1; +} + + +// Entry frames + +inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { + return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); +} + + +// Compiled frames +inline oop frame::saved_oop_result(RegisterMap* map) const { + oop* result_adr = (oop *)map->location(x10->as_VMReg()); + guarantee(result_adr != NULL, "bad register save location"); + return (*result_adr); +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + oop* result_adr = (oop *)map->location(x10->as_VMReg()); + guarantee(result_adr != NULL, "bad register save location"); + *result_adr = obj; +} + +#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp @@ -0,0 +1,484 @@ +/* + * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/g1/g1BarrierSet.hpp" +#include "gc/g1/g1BarrierSetAssembler.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" +#include "gc/g1/g1CardTable.hpp" +#include "gc/g1/g1ThreadLocalData.hpp" +#include "gc/g1/heapRegion.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interp_masm.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.hpp" +#ifdef COMPILER1 +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/g1/c1/g1BarrierSetC1.hpp" +#endif + +#define __ masm-> + +void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, RegSet saved_regs) { + assert_cond(masm != NULL); + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; + if (!dest_uninitialized) { + Label done; + Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ lwu(t0, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lbu(t0, in_progress); + } + __ beqz(t0, done); + + __ push_reg(saved_regs, sp); + if (count == c_rarg0) { + if (addr == c_rarg1) { + // exactly backwards!! + __ mv(t0, c_rarg0); + __ mv(c_rarg0, c_rarg1); + __ mv(c_rarg1, t0); + } else { + __ mv(c_rarg1, count); + __ mv(c_rarg0, addr); + } + } else { + __ mv(c_rarg0, addr); + __ mv(c_rarg1, count); + } + if (UseCompressedOops) { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); + } + __ pop_reg(saved_regs, sp); + + __ bind(done); + } +} + +void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs) { + assert_cond(masm != NULL); + __ push_reg(saved_regs, sp); + assert_different_registers(start, count, tmp); + assert_different_registers(c_rarg0, count); + __ mv(c_rarg0, start); + __ mv(c_rarg1, count); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); + __ pop_reg(saved_regs, sp); +} + +void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. + + assert_cond(masm != NULL); + assert(thread == xthread, "must be"); + + Label done; + Label runtime; + + assert_different_registers(obj, pre_val, tmp, t0); + assert(pre_val != noreg && tmp != noreg, "expecting a register"); + + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width + __ lwu(tmp, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lbu(tmp, in_progress); + } + __ beqz(tmp, done); + + // Do we need to load the previous value? + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); + } + + // Is the previous value null? + __ beqz(pre_val, done); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + + __ ld(tmp, index); // tmp := *index_adr + __ beqz(tmp, runtime); // tmp == 0? + // If yes, goto runtime + + __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize + __ sd(tmp, index); // *index_adr := tmp + __ ld(t0, buffer); + __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr + + // Record the previous value + __ sd(pre_val, Address(tmp, 0)); + __ j(done); + + __ bind(runtime); + // save the live input values + RegSet saved = RegSet::of(pre_val); + if (tosca_live) { saved += RegSet::of(x10); } + if (obj != noreg) { saved += RegSet::of(obj); } + + __ push_reg(saved, sp); + + if (expand_call) { + assert(pre_val != c_rarg1, "smashed arg"); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } + + __ pop_reg(saved, sp); + + __ bind(done); + +} + +void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2) { + assert_cond(masm != NULL); + assert(thread == xthread, "must be"); + assert_different_registers(store_addr, new_val, thread, tmp, tmp2, + t0); + assert(store_addr != noreg && new_val != noreg && tmp != noreg && + tmp2 != noreg, "expecting a register"); + + Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + + Label done; + Label runtime; + + // Does store cross heap regions? + + __ xorr(tmp, store_addr, new_val); + __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes); + __ beqz(tmp, done); + + // crosses regions, storing NULL? + + __ beqz(new_val, done); + + // storing region crossing non-NULL, is card already dirty? + + ExternalAddress cardtable((address) ct->byte_map_base()); + const Register card_addr = tmp; + + __ srli(card_addr, store_addr, CardTable::card_shift); + + // get the address of the card + __ load_byte_map_base(tmp2); + __ add(card_addr, card_addr, tmp2); + __ lbu(tmp2, Address(card_addr)); + __ mv(t0, (int)G1CardTable::g1_young_card_val()); + __ beq(tmp2, t0, done); + + assert((int)CardTable::dirty_card_val() == 0, "must be 0"); + + __ membar(MacroAssembler::StoreLoad); + + __ lbu(tmp2, Address(card_addr)); + __ beqz(tmp2, done); + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + + __ sb(zr, Address(card_addr)); + + __ ld(t0, queue_index); + __ beqz(t0, runtime); + __ sub(t0, t0, wordSize); + __ sd(t0, queue_index); + + __ ld(tmp2, buffer); + __ add(t0, tmp2, t0); + __ sd(card_addr, Address(t0, 0)); + __ j(done); + + __ bind(runtime); + // save the live input values + RegSet saved = RegSet::of(store_addr); + __ push_reg(saved, sp); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + __ pop_reg(saved, sp); + + __ bind(done); +} + +void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { + assert_cond(masm != NULL); + bool on_oop = is_reference_type(type); + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool on_reference = on_weak || on_phantom; + ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + if (on_oop && on_reference) { + // RA is live. It must be saved around calls. + __ enter(); // barrier may call runtime + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + g1_write_barrier_pre(masm /* masm */, + noreg /* obj */, + dst /* pre_val */, + xthread /* thread */, + tmp1 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + __ leave(); + } +} + +void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + assert_cond(masm != NULL); + // flatten object address if needed + if (dst.offset() == 0) { + if (dst.base() != x13) { + __ mv(x13, dst.base()); + } + } else { + __ la(x13, dst); + } + + g1_write_barrier_pre(masm, + x13 /* obj */, + tmp2 /* pre_val */, + xthread /* thread */, + tmp1 /* tmp */, + val != noreg /* tosca_live */, + false /* expand_call */); + + if (val == noreg) { + BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg); + } else { + // G1 barrier needs uncompressed oop for region cross check. + Register new_val = val; + if (UseCompressedOops) { + new_val = t1; + __ mv(new_val, val); + } + BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg); + g1_write_barrier_post(masm, + x13 /* store_adr */, + new_val /* new_val */, + xthread /* thread */, + tmp1 /* tmp */, + tmp2 /* tmp2 */); + } +} + +#ifdef COMPILER1 + +#undef __ +#define __ ce->masm()-> + +void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { + G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + + // At this point we know that marking is in progress. + // If do_load() is true then we have to emit the + // load of the previous value; otherwise it has already + // been loaded into _pre_val. + __ bind(*stub->entry()); + + assert(stub->pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); + } + __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); + ce->store_parameter(stub->pre_val()->as_register(), 0); + __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); + __ j(*stub->continuation()); +} + +void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { + G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + __ bind(*stub->entry()); + assert(stub->addr()->is_register(), "Precondition"); + assert(stub->new_val()->is_register(), "Precondition"); + Register new_val_reg = stub->new_val()->as_register(); + __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true); + ce->store_parameter(stub->addr()->as_pointer_register(), 0); + __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin())); + __ j(*stub->continuation()); +} + +#undef __ + +#define __ sasm-> + +void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("g1_pre_barrier", false); + + BarrierSet* bs = BarrierSet::barrier_set(); + + // arg0 : previous value of memory + const Register pre_val = x10; + const Register thread = xthread; + const Register tmp = t0; + + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + + Label done; + Label runtime; + + // Is marking still active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width + __ lwu(tmp, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lbu(tmp, in_progress); + } + __ beqz(tmp, done); + + // Can we store original value in the thread's buffer? + __ ld(tmp, queue_index); + __ beqz(tmp, runtime); + + __ sub(tmp, tmp, wordSize); + __ sd(tmp, queue_index); + __ ld(t1, buffer); + __ add(tmp, tmp, t1); + __ load_parameter(0, t1); + __ sd(t1, Address(tmp, 0)); + __ j(done); + + __ bind(runtime); + __ push_call_clobbered_registers(); + __ load_parameter(0, pre_val); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + __ pop_call_clobbered_registers(); + __ bind(done); + + __ epilogue(); +} + +void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("g1_post_barrier", false); + + // arg0 : store_address + Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp + + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + + Label done; + Label runtime; + + // At this point we know new_value is non-NULL and the new_value crosses regions. + // Must check to see if card is already dirty + const Register thread = xthread; + + Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + + const Register card_offset = t1; + // RA is free here, so we can use it to hold the byte_map_base. + const Register byte_map_base = ra; + + assert_different_registers(card_offset, byte_map_base, t0); + + __ load_parameter(0, card_offset); + __ srli(card_offset, card_offset, CardTable::card_shift); + __ load_byte_map_base(byte_map_base); + + // Convert card offset into an address in card_addr + Register card_addr = card_offset; + __ add(card_addr, byte_map_base, card_addr); + + __ lbu(t0, Address(card_addr, 0)); + __ sub(t0, t0, (int)G1CardTable::g1_young_card_val()); + __ beqz(t0, done); + + assert((int)CardTable::dirty_card_val() == 0, "must be 0"); + + __ membar(MacroAssembler::StoreLoad); + __ lbu(t0, Address(card_addr, 0)); + __ beqz(t0, done); + + // storing region crossing non-NULL, card is clean. + // dirty card and log. + __ sb(zr, Address(card_addr, 0)); + + __ ld(t0, queue_index); + __ beqz(t0, runtime); + __ sub(t0, t0, wordSize); + __ sd(t0, queue_index); + + // Reuse RA to hold buffer_addr + const Register buffer_addr = ra; + + __ ld(buffer_addr, buffer); + __ add(t0, buffer_addr, t0); + __ sd(card_addr, Address(t0, 0)); + __ j(done); + + __ bind(runtime); + __ push_call_clobbered_registers(); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + __ pop_call_clobbered_registers(); + __ bind(done); + __ epilogue(); +} + +#undef __ + +#endif // COMPILER1 Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" +#include "utilities/macros.hpp" + +#ifdef COMPILER1 +class LIR_Assembler; +#endif +class StubAssembler; +class G1PreBarrierStub; +class G1PostBarrierStub; + +class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { +protected: + void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, RegSet saved_regs); + void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs); + + void g1_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call); + + void g1_write_barrier_post(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2); + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + +public: +#ifdef COMPILER1 + void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); + void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); + + void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); + void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); +#endif + + void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); +}; + +#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP +#define CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP + +const size_t G1MergeHeapRootsPrefetchCacheSize = 16; + +#endif // CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "classfile/classLoaderData.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/barrierSetNMethod.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interp_masm.hpp" +#include "memory/universe.hpp" +#include "runtime/jniHandles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.hpp" + +#define __ masm-> + +void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { + assert_cond(masm != NULL); + + // RA is live. It must be saved around calls. + + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + bool is_not_null = (decorators & IS_NOT_NULL) != 0; + switch (type) { + case T_OBJECT: // fall through + case T_ARRAY: { + if (in_heap) { + if (UseCompressedOops) { + __ lwu(dst, src); + if (is_not_null) { + __ decode_heap_oop_not_null(dst); + } else { + __ decode_heap_oop(dst); + } + } else { + __ ld(dst, src); + } + } else { + assert(in_native, "why else?"); + __ ld(dst, src); + } + break; + } + case T_BOOLEAN: __ load_unsigned_byte (dst, src); break; + case T_BYTE: __ load_signed_byte (dst, src); break; + case T_CHAR: __ load_unsigned_short(dst, src); break; + case T_SHORT: __ load_signed_short (dst, src); break; + case T_INT: __ lw (dst, src); break; + case T_LONG: __ ld (dst, src); break; + case T_ADDRESS: __ ld (dst, src); break; + case T_FLOAT: __ flw (f10, src); break; + case T_DOUBLE: __ fld (f10, src); break; + default: Unimplemented(); + } +} + +void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + assert_cond(masm != NULL); + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + switch (type) { + case T_OBJECT: // fall through + case T_ARRAY: { + val = val == noreg ? zr : val; + if (in_heap) { + if (UseCompressedOops) { + assert(!dst.uses(val), "not enough registers"); + if (val != zr) { + __ encode_heap_oop(val); + } + __ sw(val, dst); + } else { + __ sd(val, dst); + } + } else { + assert(in_native, "why else?"); + __ sd(val, dst); + } + break; + } + case T_BOOLEAN: + __ andi(val, val, 0x1); // boolean is true if LSB is 1 + __ sb(val, dst); + break; + case T_BYTE: __ sb(val, dst); break; + case T_CHAR: __ sh(val, dst); break; + case T_SHORT: __ sh(val, dst); break; + case T_INT: __ sw(val, dst); break; + case T_LONG: __ sd(val, dst); break; + case T_ADDRESS: __ sd(val, dst); break; + case T_FLOAT: __ fsw(f10, dst); break; + case T_DOUBLE: __ fsd(f10, dst); break; + default: Unimplemented(); + } + +} + +void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath) { + assert_cond(masm != NULL); + // If mask changes we need to ensure that the inverse is still encodable as an immediate + STATIC_ASSERT(JNIHandles::weak_tag_mask == 1); + __ andi(obj, obj, ~JNIHandles::weak_tag_mask); + __ ld(obj, Address(obj, 0)); // *obj +} + +// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. +void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register tmp1, + Register tmp2, + Label& slow_case, + bool is_far) { + assert_cond(masm != NULL); + assert_different_registers(obj, tmp2); + assert_different_registers(obj, var_size_in_bytes); + Register end = tmp2; + + __ ld(obj, Address(xthread, JavaThread::tlab_top_offset())); + if (var_size_in_bytes == noreg) { + __ la(end, Address(obj, con_size_in_bytes)); + } else { + __ add(end, obj, var_size_in_bytes); + } + __ ld(t0, Address(xthread, JavaThread::tlab_end_offset())); + __ bgtu(end, t0, slow_case, is_far); + + // update the tlab top pointer + __ sd(end, Address(xthread, JavaThread::tlab_top_offset())); + + // recover var_size_in_bytes if necessary + if (var_size_in_bytes == end) { + __ sub(var_size_in_bytes, var_size_in_bytes, obj); + } +} + +// Defines obj, preserves var_size_in_bytes +void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register tmp1, + Label& slow_case, + bool is_far) { + assert_cond(masm != NULL); + assert_different_registers(obj, var_size_in_bytes, tmp1); + if (!Universe::heap()->supports_inline_contig_alloc()) { + __ j(slow_case); + } else { + Register end = tmp1; + Label retry; + __ bind(retry); + + // Get the current end of the heap + ExternalAddress address_end((address) Universe::heap()->end_addr()); + { + int32_t offset; + __ la_patchable(t1, address_end, offset); + __ ld(t1, Address(t1, offset)); + } + + // Get the current top of the heap + ExternalAddress address_top((address) Universe::heap()->top_addr()); + { + int32_t offset; + __ la_patchable(t0, address_top, offset); + __ addi(t0, t0, offset); + __ lr_d(obj, t0, Assembler::aqrl); + } + + // Adjust it my the size of our new object + if (var_size_in_bytes == noreg) { + __ la(end, Address(obj, con_size_in_bytes)); + } else { + __ add(end, obj, var_size_in_bytes); + } + + // if end < obj then we wrapped around high memory + __ bltu(end, obj, slow_case, is_far); + + __ bgtu(end, t1, slow_case, is_far); + + // If heap_top hasn't been changed by some other thread, update it. + __ sc_d(t1, end, t0, Assembler::rl); + __ bnez(t1, retry); + + incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1); + } +} + +void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, + Register var_size_in_bytes, + int con_size_in_bytes, + Register tmp1) { + assert_cond(masm != NULL); + assert(tmp1->is_valid(), "need temp reg"); + + __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); + if (var_size_in_bytes->is_valid()) { + __ add(tmp1, tmp1, var_size_in_bytes); + } else { + __ add(tmp1, tmp1, con_size_in_bytes); + } + __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); +} + +void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { + BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); + + if (bs_nm == NULL) { + return; + } + + // RISCV atomic operations require that the memory address be naturally aligned. + __ align(4); + + Label skip, guard; + Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset())); + + __ lwu(t0, guard); + + // Subsequent loads of oops must occur after load of guard value. + // BarrierSetNMethod::disarm sets guard with release semantics. + __ membar(MacroAssembler::LoadLoad); + __ lwu(t1, thread_disarmed_addr); + __ beq(t0, t1, skip); + + int32_t offset = 0; + __ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset); + __ jalr(ra, t0, offset); + __ j(skip); + + __ bind(guard); + + assert(__ offset() % 4 == 0, "bad alignment"); + __ emit_int32(0); // nmethod guard value. Skipped over in common case. + + __ bind(skip); +} + +void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { + BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod(); + if (bs == NULL) { + return; + } + + Label bad_call; + __ beqz(xmethod, bad_call); + + // Pointer chase to the method holder to find out if the method is concurrently unloading. + Label method_live; + __ load_method_holder_cld(t0, xmethod); + + // Is it a strong CLD? + __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_offset())); + __ bnez(t1, method_live); + + // Is it a weak but alive CLD? + __ push_reg(RegSet::of(x28, x29), sp); + + __ ld(x28, Address(t0, ClassLoaderData::holder_offset())); + + // Uses x28 & x29, so we must pass new temporaries. + __ resolve_weak_handle(x28, x29); + __ mv(t0, x28); + + __ pop_reg(RegSet::of(x28, x29), sp); + + __ bnez(t0, method_live); + + __ bind(bad_call); + + __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + __ bind(method_live); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetNMethod.hpp" +#include "memory/allocation.hpp" +#include "oops/access.hpp" + +class BarrierSetAssembler: public CHeapObj { +private: + void incr_allocated_bytes(MacroAssembler* masm, + Register var_size_in_bytes, int con_size_in_bytes, + Register t1 = noreg); + +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs) {} + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register start, Register end, Register tmp, RegSet saved_regs) {} + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath); + + virtual void tlab_allocate(MacroAssembler* masm, + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register tmp1, // temp register + Register tmp2, // temp register + Label& slow_case, // continuation point if fast allocation fails + bool is_far = false + ); + + void eden_allocate(MacroAssembler* masm, + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register tmp1, // temp register + Label& slow_case, // continuation point if fast allocation fails + bool is_far = false + ); + virtual void barrier_stubs_init() {} + + virtual void nmethod_entry_barrier(MacroAssembler* masm); + virtual void c2i_entry_barrier(MacroAssembler* masm); + virtual ~BarrierSetAssembler() {} +}; + +#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/codeCache.hpp" +#include "code/nativeInst.hpp" +#include "gc/shared/barrierSetNMethod.hpp" +#include "logging/log.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/registerMap.hpp" +#include "runtime/thread.hpp" +#include "utilities/align.hpp" +#include "utilities/debug.hpp" + +class NativeNMethodBarrier: public NativeInstruction { + address instruction_address() const { return addr_at(0); } + + int *guard_addr() { + /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */ + return reinterpret_cast(instruction_address() + 12 * 4); + } + +public: + int get_value() { + return Atomic::load_acquire(guard_addr()); + } + + void set_value(int value) { + Atomic::release_store(guard_addr(), value); + } + + void verify() const; +}; + +// Store the instruction bitmask, bits and name for checking the barrier. +struct CheckInsn { + uint32_t mask; + uint32_t bits; + const char *name; +}; + +static const struct CheckInsn barrierInsn[] = { + { 0x00000fff, 0x00000297, "auipc t0, 0 "}, + { 0x000fffff, 0x0002e283, "lwu t0, 48(t0) "}, + { 0xffffffff, 0x0aa0000f, "fence ir, ir "}, + { 0x000fffff, 0x000be303, "lwu t1, 112(xthread)"}, + { 0x01fff07f, 0x00628063, "beq t0, t1, skip "}, + { 0x00000fff, 0x000002b7, "lui t0, imm0 "}, + { 0x000fffff, 0x00028293, "addi t0, t0, imm1 "}, + { 0xffffffff, 0x00b29293, "slli t0, t0, 11 "}, + { 0x000fffff, 0x00028293, "addi t0, t0, imm2 "}, + { 0xffffffff, 0x00529293, "slli t0, t0, 5 "}, + { 0x000fffff, 0x000280e7, "jalr ra, imm3(t0) "}, + { 0x00000fff, 0x0000006f, "j skip "} + /* guard: */ + /* 32bit nmethod guard value */ + /* skip: */ +}; + +// The encodings must match the instructions emitted by +// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific +// register numbers and immediate values in the encoding. +void NativeNMethodBarrier::verify() const { + intptr_t addr = (intptr_t) instruction_address(); + for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) { + uint32_t inst = *((uint32_t*) addr); + if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) { + tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst); + fatal("not an %s instruction.", barrierInsn[i].name); + } + addr += 4; + } +} + + +/* We're called from an nmethod when we need to deoptimize it. We do + this by throwing away the nmethod's frame and jumping to the + ic_miss stub. This looks like there has been an IC miss at the + entry of the nmethod, so we resolve the call, which will fall back + to the interpreter if the nmethod has been unloaded. */ +void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { + + typedef struct { + intptr_t *sp; intptr_t *fp; address ra; address pc; + } frame_pointers_t; + + frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5); + + JavaThread *thread = JavaThread::current(); + RegisterMap reg_map(thread, false); + frame frame = thread->last_frame(); + + assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be"); + assert(frame.cb() == nm, "must be"); + frame = frame.sender(®_map); + + LogTarget(Trace, nmethod, barrier) out; + if (out.is_enabled()) { + ResourceMark mark; + log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p", + nm->method()->name_and_sig_as_C_string(), + nm, *(address *) return_address_ptr, nm->is_osr_method(), thread, + thread->get_thread_name(), frame.sp(), nm->verified_entry_point()); + } + + new_frame->sp = frame.sp(); + new_frame->fp = frame.fp(); + new_frame->ra = frame.pc(); + new_frame->pc = SharedRuntime::get_handle_wrong_method_stub(); +} + +// This is the offset of the entry barrier from where the frame is completed. +// If any code changes between the end of the verified entry where the entry +// barrier resides, and the completion of the frame, then +// NativeNMethodCmpBarrier::verify() will immediately complain when it does +// not find the expected native instruction at this offset, which needs updating. +// Note that this offset is invariant of PreserveFramePointer. + +// see BarrierSetAssembler::nmethod_entry_barrier +// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32 +static const int entry_barrier_offset = -4 * 13; + +static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) { + address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset; + NativeNMethodBarrier* barrier = reinterpret_cast(barrier_address); + debug_only(barrier->verify()); + return barrier; +} + +void BarrierSetNMethod::disarm(nmethod* nm) { + if (!supports_entry_barrier(nm)) { + return; + } + + // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier. + NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); + + barrier->set_value(disarmed_value()); +} + +bool BarrierSetNMethod::is_armed(nmethod* nm) { + if (!supports_entry_barrier(nm)) { + return false; + } + + NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); + return barrier->get_value() != disarmed_value(); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/cardTableBarrierSetAssembler.hpp" +#include "gc/shared/gc_globals.hpp" +#include "interpreter/interp_masm.hpp" + +#define __ masm-> + + +void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) { + assert_cond(masm != NULL); + assert_different_registers(obj, tmp); + BarrierSet* bs = BarrierSet::barrier_set(); + assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); + + __ srli(obj, obj, CardTable::card_shift); + + assert(CardTable::dirty_card_val() == 0, "must be"); + + __ load_byte_map_base(tmp); + __ add(tmp, obj, tmp); + + if (UseCondCardMark) { + Label L_already_dirty; + __ membar(MacroAssembler::StoreLoad); + __ lbu(t1, Address(tmp)); + __ beqz(t1, L_already_dirty); + __ sb(zr, Address(tmp)); + __ bind(L_already_dirty); + } else { + __ sb(zr, Address(tmp)); + } +} + +void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs) { + assert_cond(masm != NULL); + assert_different_registers(start, tmp); + assert_different_registers(count, tmp); + + Label L_loop, L_done; + const Register end = count; + + __ beqz(count, L_done); // zero count - nothing to do + // end = start + count << LogBytesPerHeapOop + __ shadd(end, count, start, count, LogBytesPerHeapOop); + __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive + + __ srli(start, start, CardTable::card_shift); + __ srli(end, end, CardTable::card_shift); + __ sub(count, end, start); // number of bytes to copy + + __ load_byte_map_base(tmp); + __ add(start, start, tmp); + + __ bind(L_loop); + __ add(tmp, start, count); + __ sb(zr, Address(tmp)); + __ sub(count, count, 1); + __ bgez(count, L_loop); + __ bind(L_done); +} + +void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool is_array = (decorators & IS_ARRAY) != 0; + bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool precise = is_array || on_anonymous; + + bool needs_post_barrier = val != noreg && in_heap; + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); + if (needs_post_barrier) { + // flatten object address if needed + if (!precise || dst.offset() == 0) { + store_check(masm, dst.base(), x13); + } else { + assert_cond(masm != NULL); + __ la(x13, dst); + store_check(masm, x13, t0); + } + } +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { +protected: + void store_check(MacroAssembler* masm, Register obj, Register tmp); + + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs); + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); +}; + +#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +#define __ masm-> + +void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs) { + + if (is_oop) { + gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs); + } +} + +void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register start, Register count, Register tmp, + RegSet saved_regs) { + if (is_oop) { + gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs); + } +} + +void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + if (is_reference_type(type)) { + oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); + } else { + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); + } +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" + +// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other +// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected +// accesses, which are overridden in the concrete BarrierSetAssembler. + +class ModRefBarrierSetAssembler: public BarrierSetAssembler { +protected: + virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, RegSet saved_regs) {} + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs) {} + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) = 0; + +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs); + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register start, Register count, Register tmp, RegSet saved_regs); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); +}; + +#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/shared/gc_globals.hpp" +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" +#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" + +#define __ masm->masm()-> + +void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) { + Register addr = _addr->as_register_lo(); + Register newval = _new_value->as_register(); + Register cmpval = _cmp_value->as_register(); + Register tmp1 = _tmp1->as_register(); + Register tmp2 = _tmp2->as_register(); + Register result = result_opr()->as_register(); + + ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1); + + if (UseCompressedOops) { + __ encode_heap_oop(tmp1, cmpval); + cmpval = tmp1; + __ encode_heap_oop(tmp2, newval); + newval = tmp2; + } + + ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq, + /* release */ Assembler::rl, /* is_cae */ false, result); +} + +#undef __ + +#ifdef ASSERT +#define __ gen->lir(__FILE__, __LINE__)-> +#else +#define __ gen->lir()-> +#endif + +LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) { + BasicType bt = access.type(); + if (access.is_oop()) { + LIRGenerator *gen = access.gen(); + if (ShenandoahSATBBarrier) { + pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(), + LIR_OprFact::illegalOpr /* pre_val */); + } + if (ShenandoahCASBarrier) { + cmp_value.load_item(); + new_value.load_item(); + + LIR_Opr tmp1 = gen->new_register(T_OBJECT); + LIR_Opr tmp2 = gen->new_register(T_OBJECT); + LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base(); + LIR_Opr result = gen->new_register(T_INT); + + __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result)); + return result; + } + } + return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value); +} + +LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) { + LIRGenerator* gen = access.gen(); + BasicType type = access.type(); + + LIR_Opr result = gen->new_register(type); + value.load_item(); + LIR_Opr value_opr = value.result(); + + if (access.is_oop()) { + value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators()); + } + + assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type"); + LIR_Opr tmp = gen->new_register(T_INT); + __ xchg(access.resolved_addr(), value_opr, result, tmp); + + if (access.is_oop()) { + result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators()); + LIR_Opr tmp_opr = gen->new_register(type); + __ move(result, tmp_opr); + result = tmp_opr; + if (ShenandoahSATBBarrier) { + pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr, + result /* pre_val */); + } + } + + return result; +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp @@ -0,0 +1,712 @@ +/* + * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" +#include "gc/shenandoah/shenandoahForwarding.hpp" +#include "gc/shenandoah/shenandoahHeap.inline.hpp" +#include "gc/shenandoah/shenandoahHeapRegion.hpp" +#include "gc/shenandoah/shenandoahRuntime.hpp" +#include "gc/shenandoah/shenandoahThreadLocalData.hpp" +#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interp_masm.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.hpp" +#ifdef COMPILER1 +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" +#endif + +#define __ masm-> + +void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs) { + if (is_oop) { + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; + if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) { + + Label done; + + // Avoid calling runtime if count == 0 + __ beqz(count, done); + + // Is GC active? + Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + assert_different_registers(src, dst, count, t0); + + __ lbu(t0, gc_state); + if (ShenandoahSATBBarrier && dest_uninitialized) { + __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED); + __ beqz(t0, done); + } else { + __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING); + __ beqz(t0, done); + } + + __ push_reg(saved_regs, sp); + if (UseCompressedOops) { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), + src, dst, count); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count); + } + __ pop_reg(saved_regs, sp); + __ bind(done); + } + } +} + +void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + if (ShenandoahSATBBarrier) { + satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); + } +} + +void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. + assert(thread == xthread, "must be"); + + Label done; + Label runtime; + + assert_different_registers(obj, pre_val, tmp, t0); + assert(pre_val != noreg && tmp != noreg, "expecting a register"); + + Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); + Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); + + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ lwu(tmp, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lbu(tmp, in_progress); + } + __ beqz(tmp, done); + + // Do we need to load the previous value? + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); + } + + // Is the previous value null? + __ beqz(pre_val, done); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + __ ld(tmp, index); // tmp := *index_adr + __ beqz(tmp, runtime); // tmp == 0? If yes, goto runtime + + __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize + __ sd(tmp, index); // *index_adr := tmp + __ ld(t0, buffer); + __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr + + // Record the previous value + __ sd(pre_val, Address(tmp, 0)); + __ j(done); + + __ bind(runtime); + // save the live input values + RegSet saved = RegSet::of(pre_val); + if (tosca_live) saved += RegSet::of(x10); + if (obj != noreg) saved += RegSet::of(obj); + + __ push_reg(saved, sp); + + // Calling the runtime using the regular call_VM_leaf mechanism generates + // code (generated by InterpreterMacroAssember::call_VM_leaf_base) + // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL. + // + // If we care generating the pre-barrier without a frame (e.g. in the + // intrinsified Reference.get() routine) then ebp might be pointing to + // the caller frame and so this check will most likely fail at runtime. + // + // Expanding the call directly bypasses the generation of the check. + // So when we do not have have a full interpreter frame on the stack + // expand_call should be passed true. + if (expand_call) { + assert(pre_val != c_rarg1, "smashed arg"); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); + } + + __ pop_reg(saved, sp); + + __ bind(done); +} + +void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { + assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); + + Label is_null; + __ beqz(dst, is_null); + resolve_forward_pointer_not_null(masm, dst, tmp); + __ bind(is_null); +} + +// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely +// passed in. +void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { + assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); + // The below loads the mark word, checks if the lowest two bits are + // set, and if so, clear the lowest two bits and copy the result + // to dst. Otherwise it leaves dst alone. + // Implementing this is surprisingly awkward. I do it here by: + // - Inverting the mark word + // - Test lowest two bits == 0 + // - If so, set the lowest two bits + // - Invert the result back, and copy to dst + RegSet saved_regs = RegSet::of(t2); + bool borrow_reg = (tmp == noreg); + if (borrow_reg) { + // No free registers available. Make one useful. + tmp = t0; + if (tmp == dst) { + tmp = t1; + } + saved_regs += RegSet::of(tmp); + } + + assert_different_registers(tmp, dst, t2); + __ push_reg(saved_regs, sp); + + Label done; + __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); + __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1 + __ andi(t2, tmp, markWord::lock_mask_in_place); + __ bnez(t2, done); + __ ori(tmp, tmp, markWord::marked_value); + __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1 + __ bind(done); + + __ pop_reg(saved_regs, sp); +} + +void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, + Register dst, + Address load_addr, + DecoratorSet decorators) { + assert(ShenandoahLoadRefBarrier, "Should be enabled"); + assert(dst != t1 && load_addr.base() != t1, "need t1"); + assert_different_registers(load_addr.base(), t0, t1); + + bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); + bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); + bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); + bool is_native = ShenandoahBarrierSet::is_native_access(decorators); + bool is_narrow = UseCompressedOops && !is_native; + + Label heap_stable, not_cset; + __ enter(); + Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + __ lbu(t1, gc_state); + + // Check for heap stability + if (is_strong) { + __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED); + __ beqz(t1, heap_stable); + } else { + Label lrb; + __ andi(t0, t1, ShenandoahHeap::WEAK_ROOTS); + __ bnez(t0, lrb); + __ andi(t0, t1, ShenandoahHeap::HAS_FORWARDED); + __ beqz(t0, heap_stable); + __ bind(lrb); + } + + // use x11 for load address + Register result_dst = dst; + if (dst == x11) { + __ mv(t1, dst); + dst = t1; + } + + // Save x10 and x11, unless it is an output register + RegSet saved_regs = RegSet::of(x10, x11) - result_dst; + __ push_reg(saved_regs, sp); + __ la(x11, load_addr); + __ mv(x10, dst); + + // Test for in-cset + if (is_strong) { + __ li(t1, (uint64_t)ShenandoahHeap::in_cset_fast_test_addr()); + __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint()); + __ add(t1, t1, t0); + __ lbu(t1, Address(t1)); + __ andi(t0, t1, 1); + __ beqz(t0, not_cset); + } + + __ push_call_clobbered_registers(); + if (is_strong) { + if (is_narrow) { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); + } else { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); + } + } else if (is_weak) { + if (is_narrow) { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); + } else { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); + } + } else { + assert(is_phantom, "only remaining strength"); + assert(!is_narrow, "phantom access cannot be narrow"); + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); + } + __ jalr(ra); + __ mv(t0, x10); + __ pop_call_clobbered_registers(); + __ mv(x10, t0); + __ bind(not_cset); + __ mv(result_dst, x10); + __ pop_reg(saved_regs, sp); + + __ bind(heap_stable); + __ leave(); +} + +void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) { + if (ShenandoahIUBarrier) { + __ push_call_clobbered_registers(); + + satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false); + + __ pop_call_clobbered_registers(); + } +} + +// +// Arguments: +// +// Inputs: +// src: oop location to load from, might be clobbered +// +// Output: +// dst: oop loaded from src location +// +// Kill: +// x30 (tmp reg) +// +// Alias: +// dst: x30 (might use x30 as temporary output register to avoid clobbering src) +// +void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, + DecoratorSet decorators, + BasicType type, + Register dst, + Address src, + Register tmp1, + Register tmp_thread) { + // 1: non-reference load, no additional barrier is needed + if (!is_reference_type(type)) { + BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + return; + } + + // 2: load a reference from src location and apply LRB if needed + if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { + Register result_dst = dst; + + // Preserve src location for LRB + RegSet saved_regs; + if (dst == src.base()) { + dst = (src.base() == x28) ? x29 : x28; + saved_regs = RegSet::of(dst); + __ push_reg(saved_regs, sp); + } + assert_different_registers(dst, src.base()); + + BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + + load_reference_barrier(masm, dst, src, decorators); + + if (dst != result_dst) { + __ mv(result_dst, dst); + dst = result_dst; + } + + if (saved_regs.bits() != 0) { + __ pop_reg(saved_regs, sp); + } + } else { + BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + } + + // 3: apply keep-alive barrier if needed + if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { + __ enter(); + __ push_call_clobbered_registers(); + satb_write_barrier_pre(masm /* masm */, + noreg /* obj */, + dst /* pre_val */, + xthread /* thread */, + tmp1 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + __ pop_call_clobbered_registers(); + __ leave(); + } +} + +void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + bool on_oop = is_reference_type(type); + if (!on_oop) { + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); + return; + } + + // flatten object address if needed + if (dst.offset() == 0) { + if (dst.base() != x13) { + __ mv(x13, dst.base()); + } + } else { + __ la(x13, dst); + } + + shenandoah_write_barrier_pre(masm, + x13 /* obj */, + tmp2 /* pre_val */, + xthread /* thread */, + tmp1 /* tmp */, + val != noreg /* tosca_live */, + false /* expand_call */); + + if (val == noreg) { + BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg); + } else { + iu_barrier(masm, val, tmp1); + // G1 barrier needs uncompressed oop for region cross check. + Register new_val = val; + if (UseCompressedOops) { + new_val = t1; + __ mv(new_val, val); + } + BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg); + } +} + +void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath) { + Label done; + // Resolve jobject + BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); + + // Check for null. + __ beqz(obj, done); + + assert(obj != t1, "need t1"); + Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); + __ lbu(t1, gc_state); + + // Check for heap in evacuation phase + __ andi(t0, t1, ShenandoahHeap::EVACUATION); + __ bnez(t0, slowpath); + + __ bind(done); +} + +// Special Shenandoah CAS implementation that handles false negatives due +// to concurrent evacuation. The service is more complex than a +// traditional CAS operation because the CAS operation is intended to +// succeed if the reference at addr exactly matches expected or if the +// reference at addr holds a pointer to a from-space object that has +// been relocated to the location named by expected. There are two +// races that must be addressed: +// a) A parallel thread may mutate the contents of addr so that it points +// to a different object. In this case, the CAS operation should fail. +// b) A parallel thread may heal the contents of addr, replacing a +// from-space pointer held in addr with the to-space pointer +// representing the new location of the object. +// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL +// or it refers to an object that is not being evacuated out of +// from-space, or it refers to the to-space version of an object that +// is being evacuated out of from-space. +// +// By default the value held in the result register following execution +// of the generated code sequence is 0 to indicate failure of CAS, +// non-zero to indicate success. If is_cae, the result is the value most +// recently fetched from addr rather than a boolean success indicator. +// +// Clobbers t0, t1 +void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, + Register addr, + Register expected, + Register new_val, + Assembler::Aqrl acquire, + Assembler::Aqrl release, + bool is_cae, + Register result) { + bool is_narrow = UseCompressedOops; + Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64; + + assert_different_registers(addr, expected, t0, t1); + assert_different_registers(addr, new_val, t0, t1); + + Label retry, success, fail, done; + + __ bind(retry); + + // Step1: Try to CAS. + __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1); + + // If success, then we are done. + __ beq(expected, t1, success); + + // Step2: CAS failed, check the forwared pointer. + __ mv(t0, t1); + + if (is_narrow) { + __ decode_heap_oop(t0, t0); + } + resolve_forward_pointer(masm, t0); + + __ encode_heap_oop(t0, t0); + + // Report failure when the forwarded oop was not expected. + __ bne(t0, expected, fail); + + // Step 3: CAS again using the forwarded oop. + __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0); + + // Retry when failed. + __ bne(t0, t1, retry); + + __ bind(success); + if (is_cae) { + __ mv(result, expected); + } else { + __ addi(result, zr, 1); + } + __ j(done); + + __ bind(fail); + if (is_cae) { + __ mv(result, t0); + } else { + __ mv(result, zr); + } + + __ bind(done); +} + +#undef __ + +#ifdef COMPILER1 + +#define __ ce->masm()-> + +void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { + ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + // At this point we know that marking is in progress. + // If do_load() is true then we have to emit the + // load of the previous value; otherwise it has already + // been loaded into _pre_val. + __ bind(*stub->entry()); + + assert(stub->pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); + } + __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); + ce->store_parameter(stub->pre_val()->as_register(), 0); + __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); + __ j(*stub->continuation()); +} + +void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, + ShenandoahLoadReferenceBarrierStub* stub) { + ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + __ bind(*stub->entry()); + + DecoratorSet decorators = stub->decorators(); + bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); + bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); + bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); + bool is_native = ShenandoahBarrierSet::is_native_access(decorators); + + Register obj = stub->obj()->as_register(); + Register res = stub->result()->as_register(); + Register addr = stub->addr()->as_pointer_register(); + Register tmp1 = stub->tmp1()->as_register(); + Register tmp2 = stub->tmp2()->as_register(); + + assert(res == x10, "result must arrive in x10"); + assert_different_registers(tmp1, tmp2, t0); + + if (res != obj) { + __ mv(res, obj); + } + + if (is_strong) { + // Check for object in cset. + __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); + __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); + __ add(tmp2, tmp2, tmp1); + __ lbu(tmp2, Address(tmp2)); + __ beqz(tmp2, *stub->continuation(), true /* is_far */); + } + + ce->store_parameter(res, 0); + ce->store_parameter(addr, 1); + + if (is_strong) { + if (is_native) { + __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin())); + } else { + __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin())); + } + } else if (is_weak) { + __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin())); + } else { + assert(is_phantom, "only remaining strength"); + __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin())); + } + + __ j(*stub->continuation()); +} + +#undef __ + +#define __ sasm-> + +void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("shenandoah_pre_barrier", false); + + // arg0 : previous value of memory + + BarrierSet* bs = BarrierSet::barrier_set(); + + const Register pre_val = x10; + const Register thread = xthread; + const Register tmp = t0; + + Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); + + Label done; + Label runtime; + + // Is marking still active? + Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + __ lb(tmp, gc_state); + __ andi(tmp, tmp, ShenandoahHeap::MARKING); + __ beqz(tmp, done); + + // Can we store original value in the thread's buffer? + __ ld(tmp, queue_index); + __ beqz(tmp, runtime); + + __ sub(tmp, tmp, wordSize); + __ sd(tmp, queue_index); + __ ld(t1, buffer); + __ add(tmp, tmp, t1); + __ load_parameter(0, t1); + __ sd(t1, Address(tmp, 0)); + __ j(done); + + __ bind(runtime); + __ push_call_clobbered_registers(); + __ load_parameter(0, pre_val); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); + __ pop_call_clobbered_registers(); + __ bind(done); + + __ epilogue(); +} + +void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, + DecoratorSet decorators) { + __ prologue("shenandoah_load_reference_barrier", false); + // arg0 : object to be resolved + + __ push_call_clobbered_registers(); + __ load_parameter(0, x10); + __ load_parameter(1, x11); + + bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); + bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); + bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); + bool is_native = ShenandoahBarrierSet::is_native_access(decorators); + if (is_strong) { + if (is_native) { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); + } else { + if (UseCompressedOops) { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); + } else { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); + } + } + } else if (is_weak) { + assert(!is_native, "weak must not be called off-heap"); + if (UseCompressedOops) { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); + } else { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); + } + } else { + assert(is_phantom, "only remaining strength"); + assert(is_native, "phantom must only be called off-heap"); + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_phantom); + } + __ jalr(ra); + __ mv(t0, x10); + __ pop_call_clobbered_registers(); + __ mv(x10, t0); + + __ epilogue(); +} + +#undef __ + +#endif // COMPILER1 Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#ifdef COMPILER1 +class LIR_Assembler; +class ShenandoahPreBarrierStub; +class ShenandoahLoadReferenceBarrierStub; +class StubAssembler; +#endif +class StubCodeGenerator; + +class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { +private: + + void satb_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call); + void shenandoah_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call); + + void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg); + void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg); + void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators); + +public: + + void iu_barrier(MacroAssembler* masm, Register dst, Register tmp); + +#ifdef COMPILER1 + void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); + void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); + void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); + void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators); +#endif + + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs); + + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath); + + void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, + Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); +}; + +#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad @@ -0,0 +1,285 @@ +// +// Copyright (c) 2018, Red Hat, Inc. All rights reserved. +// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +source_hpp %{ +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" +%} + +instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + + format %{ + "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ + match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + + format %{ + "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + + format %{ + "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::aq /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ + predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + + format %{ + "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::aq /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ + match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + effect(TEMP_DEF res, TEMP tmp, KILL cr); + + format %{ + "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + true /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP_DEF res, TEMP tmp, KILL cr); + format %{ + "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + true /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ + match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + format %{ + "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah" + "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ + predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP_DEF res, TEMP tmp, KILL cr); + format %{ + "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::aq /* acquire */, Assembler::rl /* release */, + true /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP_DEF res, TEMP tmp, KILL cr); + format %{ + "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::aq /* acquire */, Assembler::rl /* release */, + true /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + format %{ + "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ + predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + format %{ + "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah" + "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::aq /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + format %{ + "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah" + "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::aq /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/codeBlob.hpp" +#include "code/vmreg.inline.hpp" +#include "gc/z/zBarrier.inline.hpp" +#include "gc/z/zBarrierSet.hpp" +#include "gc/z/zBarrierSetAssembler.hpp" +#include "gc/z/zBarrierSetRuntime.hpp" +#include "gc/z/zThreadLocalData.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/sharedRuntime.hpp" +#include "utilities/macros.hpp" +#ifdef COMPILER1 +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/z/c1/zBarrierSetC1.hpp" +#endif // COMPILER1 +#ifdef COMPILER2 +#include "gc/z/c2/zBarrierSetC2.hpp" +#endif // COMPILER2 + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#undef __ +#define __ masm-> + +void ZBarrierSetAssembler::load_at(MacroAssembler* masm, + DecoratorSet decorators, + BasicType type, + Register dst, + Address src, + Register tmp1, + Register tmp_thread) { + if (!ZBarrierSet::barrier_needed(decorators, type)) { + // Barrier not needed + BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + return; + } + + assert_different_registers(t1, src.base()); + assert_different_registers(t0, t1, dst); + + Label done; + + // Load bad mask into temp register. + __ la(t0, src); + __ ld(t1, address_bad_mask_from_thread(xthread)); + __ ld(dst, Address(t0)); + + // Test reference against bad mask. If mask bad, then we need to fix it up. + __ andr(t1, dst, t1); + __ beqz(t1, done); + + __ enter(); + + __ push_call_clobbered_registers_except(RegSet::of(dst)); + + if (c_rarg0 != dst) { + __ mv(c_rarg0, dst); + } + + __ mv(c_rarg1, t0); + + __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); + + // Make sure dst has the return value. + if (dst != x10) { + __ mv(dst, x10); + } + + __ pop_call_clobbered_registers_except(RegSet::of(dst)); + __ leave(); + + __ bind(done); +} + +#ifdef ASSERT + +void ZBarrierSetAssembler::store_at(MacroAssembler* masm, + DecoratorSet decorators, + BasicType type, + Address dst, + Register val, + Register tmp1, + Register tmp2) { + // Verify value + if (is_reference_type(type)) { + // Note that src could be noreg, which means we + // are storing null and can skip verification. + if (val != noreg) { + Label done; + + // tmp1 and tmp2 are often set to noreg. + RegSet savedRegs = RegSet::of(t0); + __ push_reg(savedRegs, sp); + + __ ld(t0, address_bad_mask_from_thread(xthread)); + __ andr(t0, val, t0); + __ beqz(t0, done); + __ stop("Verify oop store failed"); + __ should_not_reach_here(); + __ bind(done); + __ pop_reg(savedRegs, sp); + } + } + + // Store value + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); +} + +#endif // ASSERT + +void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, + DecoratorSet decorators, + bool is_oop, + Register src, + Register dst, + Register count, + RegSet saved_regs) { + if (!is_oop) { + // Barrier not needed + return; + } + + BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {"); + + assert_different_registers(src, count, t0); + + __ push_reg(saved_regs, sp); + + if (count == c_rarg0 && src == c_rarg1) { + // exactly backwards!! + __ xorr(c_rarg0, c_rarg0, c_rarg1); + __ xorr(c_rarg1, c_rarg0, c_rarg1); + __ xorr(c_rarg0, c_rarg0, c_rarg1); + } else { + __ mv(c_rarg0, src); + __ mv(c_rarg1, count); + } + + __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2); + + __ pop_reg(saved_regs, sp); + + BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue"); +} + +void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, + Register jni_env, + Register robj, + Register tmp, + Label& slowpath) { + BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {"); + + assert_different_registers(jni_env, robj, tmp); + + // Resolve jobject + BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath); + + // Compute the offset of address bad mask from the field of jni_environment + long int bad_mask_relative_offset = (long int) (in_bytes(ZThreadLocalData::address_bad_mask_offset()) - + in_bytes(JavaThread::jni_environment_offset())); + + // Load the address bad mask + __ ld(tmp, Address(jni_env, bad_mask_relative_offset)); + + // Check address bad mask + __ andr(tmp, robj, tmp); + __ bnez(tmp, slowpath); + + BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native"); +} + +#ifdef COMPILER2 + +OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { + if (!OptoReg::is_reg(opto_reg)) { + return OptoReg::Bad; + } + + const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if (vm_reg->is_FloatRegister()) { + return opto_reg & ~1; + } + + return opto_reg; +} + +#undef __ +#define __ _masm-> + +class ZSaveLiveRegisters { +private: + MacroAssembler* const _masm; + RegSet _gp_regs; + FloatRegSet _fp_regs; + VectorRegSet _vp_regs; + +public: + void initialize(ZLoadBarrierStubC2* stub) { + // Record registers that needs to be saved/restored + RegMaskIterator rmi(stub->live()); + while (rmi.has_next()) { + const OptoReg::Name opto_reg = rmi.next(); + if (OptoReg::is_reg(opto_reg)) { + const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if (vm_reg->is_Register()) { + _gp_regs += RegSet::of(vm_reg->as_Register()); + } else if (vm_reg->is_FloatRegister()) { + _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister()); + } else if (vm_reg->is_VectorRegister()) { + const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegisterImpl::max_slots_per_register - 1)); + _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister()); + } else { + fatal("Unknown register type"); + } + } + } + + // Remove C-ABI SOE registers, tmp regs and _ref register that will be updated + _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2) + RegSet::of(x8, x9) + RegSet::of(x5, stub->ref()); + } + + ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : + _masm(masm), + _gp_regs(), + _fp_regs(), + _vp_regs() { + // Figure out what registers to save/restore + initialize(stub); + + // Save registers + __ push_reg(_gp_regs, sp); + __ push_fp(_fp_regs, sp); + __ push_vp(_vp_regs, sp); + } + + ~ZSaveLiveRegisters() { + // Restore registers + __ pop_vp(_vp_regs, sp); + __ pop_fp(_fp_regs, sp); + __ pop_reg(_gp_regs, sp); + } +}; + +class ZSetupArguments { +private: + MacroAssembler* const _masm; + const Register _ref; + const Address _ref_addr; + +public: + ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : + _masm(masm), + _ref(stub->ref()), + _ref_addr(stub->ref_addr()) { + + // Setup arguments + if (_ref_addr.base() == noreg) { + // No self healing + if (_ref != c_rarg0) { + __ mv(c_rarg0, _ref); + } + __ mv(c_rarg1, zr); + } else { + // Self healing + if (_ref == c_rarg0) { + // _ref is already at correct place + __ la(c_rarg1, _ref_addr); + } else if (_ref != c_rarg1) { + // _ref is in wrong place, but not in c_rarg1, so fix it first + __ la(c_rarg1, _ref_addr); + __ mv(c_rarg0, _ref); + } else if (_ref_addr.base() != c_rarg0) { + assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0"); + __ mv(c_rarg0, _ref); + __ la(c_rarg1, _ref_addr); + } else { + assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0"); + if (_ref_addr.base() == c_rarg0) { + __ mv(t1, c_rarg1); + __ la(c_rarg1, _ref_addr); + __ mv(c_rarg0, t1); + } else { + ShouldNotReachHere(); + } + } + } + } + + ~ZSetupArguments() { + // Transfer result + if (_ref != x10) { + __ mv(_ref, x10); + } + } +}; + +#undef __ +#define __ masm-> + +void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const { + BLOCK_COMMENT("ZLoadBarrierStubC2"); + + // Stub entry + __ bind(*stub->entry()); + + { + ZSaveLiveRegisters save_live_registers(masm, stub); + ZSetupArguments setup_arguments(masm, stub); + int32_t offset = 0; + __ la_patchable(t0, stub->slow_path(), offset); + __ jalr(x1, t0, offset); + } + + // Stub exit + __ j(*stub->continuation()); +} + +#undef __ + +#endif // COMPILER2 + +#ifdef COMPILER1 +#undef __ +#define __ ce->masm()-> + +void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, + LIR_Opr ref) const { + assert_different_registers(xthread, ref->as_register(), t1); + __ ld(t1, address_bad_mask_from_thread(xthread)); + __ andr(t1, t1, ref->as_register()); +} + +void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce, + ZLoadBarrierStubC1* stub) const { + // Stub entry + __ bind(*stub->entry()); + + Register ref = stub->ref()->as_register(); + Register ref_addr = noreg; + Register tmp = noreg; + + if (stub->tmp()->is_valid()) { + // Load address into tmp register + ce->leal(stub->ref_addr(), stub->tmp()); + ref_addr = tmp = stub->tmp()->as_pointer_register(); + } else { + // Address already in register + ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register(); + } + + assert_different_registers(ref, ref_addr, noreg); + + // Save x10 unless it is the result or tmp register + // Set up SP to accomodate parameters and maybe x10. + if (ref != x10 && tmp != x10) { + __ sub(sp, sp, 32); + __ sd(x10, Address(sp, 16)); + } else { + __ sub(sp, sp, 16); + } + + // Setup arguments and call runtime stub + ce->store_parameter(ref_addr, 1); + ce->store_parameter(ref, 0); + + __ far_call(stub->runtime_stub()); + + // Verify result + __ verify_oop(x10, "Bad oop"); + + + // Move result into place + if (ref != x10) { + __ mv(ref, x10); + } + + // Restore x10 unless it is the result or tmp register + if (ref != x10 && tmp != x10) { + __ ld(x10, Address(sp, 16)); + __ add(sp, sp, 32); + } else { + __ add(sp, sp, 16); + } + + // Stub exit + __ j(*stub->continuation()); +} + +#undef __ +#define __ sasm-> + +void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, + DecoratorSet decorators) const { + __ prologue("zgc_load_barrier stub", false); + + __ push_call_clobbered_registers_except(RegSet::of(x10)); + + // Setup arguments + __ load_parameter(0, c_rarg0); + __ load_parameter(1, c_rarg1); + + __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); + + __ pop_call_clobbered_registers_except(RegSet::of(x10)); + + __ epilogue(); +} + +#undef __ +#endif // COMPILER1 Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP + +#include "code/vmreg.hpp" +#include "oops/accessDecorators.hpp" +#ifdef COMPILER2 +#include "opto/optoreg.hpp" +#endif // COMPILER2 + +#ifdef COMPILER1 +class LIR_Assembler; +class LIR_OprDesc; +typedef LIR_OprDesc* LIR_Opr; +class StubAssembler; +class ZLoadBarrierStubC1; +#endif // COMPILER1 + +#ifdef COMPILER2 +class Node; +class ZLoadBarrierStubC2; +#endif // COMPILER2 + +class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { +public: + virtual void load_at(MacroAssembler* masm, + DecoratorSet decorators, + BasicType type, + Register dst, + Address src, + Register tmp1, + Register tmp_thread); + +#ifdef ASSERT + virtual void store_at(MacroAssembler* masm, + DecoratorSet decorators, + BasicType type, + Address dst, + Register val, + Register tmp1, + Register tmp2); +#endif // ASSERT + + virtual void arraycopy_prologue(MacroAssembler* masm, + DecoratorSet decorators, + bool is_oop, + Register src, + Register dst, + Register count, + RegSet saved_regs); + + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, + Register jni_env, + Register robj, + Register tmp, + Label& slowpath); + +#ifdef COMPILER1 + void generate_c1_load_barrier_test(LIR_Assembler* ce, + LIR_Opr ref) const; + + void generate_c1_load_barrier_stub(LIR_Assembler* ce, + ZLoadBarrierStubC1* stub) const; + + void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, + DecoratorSet decorators) const; +#endif // COMPILER1 + +#ifdef COMPILER2 + OptoReg::Name refine_register(const Node* node, + OptoReg::Name opto_reg); + + void generate_c2_load_barrier_stub(MacroAssembler* masm, + ZLoadBarrierStubC2* stub) const; +#endif // COMPILER2 +}; + +#endif // CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/shared/gcLogPrecious.hpp" +#include "gc/shared/gc_globals.hpp" +#include "gc/z/zGlobals.hpp" +#include "runtime/globals.hpp" +#include "runtime/os.hpp" +#include "utilities/globalDefinitions.hpp" +#include "utilities/powerOfTwo.hpp" + +#ifdef LINUX +#include +#endif // LINUX + +// +// The heap can have three different layouts, depending on the max heap size. +// +// Address Space & Pointer Layout 1 +// -------------------------------- +// +// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) +// . . +// . . +// . . +// +--------------------------------+ 0x0000014000000000 (20TB) +// | Remapped View | +// +--------------------------------+ 0x0000010000000000 (16TB) +// . . +// +--------------------------------+ 0x00000c0000000000 (12TB) +// | Marked1 View | +// +--------------------------------+ 0x0000080000000000 (8TB) +// | Marked0 View | +// +--------------------------------+ 0x0000040000000000 (4TB) +// . . +// +--------------------------------+ 0x0000000000000000 +// +// 6 4 4 4 4 +// 3 6 5 2 1 0 +// +--------------------+----+-----------------------------------------------+ +// |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111| +// +--------------------+----+-----------------------------------------------+ +// | | | +// | | * 41-0 Object Offset (42-bits, 4TB address space) +// | | +// | * 45-42 Metadata Bits (4-bits) 0001 = Marked0 (Address view 4-8TB) +// | 0010 = Marked1 (Address view 8-12TB) +// | 0100 = Remapped (Address view 16-20TB) +// | 1000 = Finalizable (Address view N/A) +// | +// * 63-46 Fixed (18-bits, always zero) +// +// +// Address Space & Pointer Layout 2 +// -------------------------------- +// +// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) +// . . +// . . +// . . +// +--------------------------------+ 0x0000280000000000 (40TB) +// | Remapped View | +// +--------------------------------+ 0x0000200000000000 (32TB) +// . . +// +--------------------------------+ 0x0000180000000000 (24TB) +// | Marked1 View | +// +--------------------------------+ 0x0000100000000000 (16TB) +// | Marked0 View | +// +--------------------------------+ 0x0000080000000000 (8TB) +// . . +// +--------------------------------+ 0x0000000000000000 +// +// 6 4 4 4 4 +// 3 7 6 3 2 0 +// +------------------+-----+------------------------------------------------+ +// |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111| +// +-------------------+----+------------------------------------------------+ +// | | | +// | | * 42-0 Object Offset (43-bits, 8TB address space) +// | | +// | * 46-43 Metadata Bits (4-bits) 0001 = Marked0 (Address view 8-16TB) +// | 0010 = Marked1 (Address view 16-24TB) +// | 0100 = Remapped (Address view 32-40TB) +// | 1000 = Finalizable (Address view N/A) +// | +// * 63-47 Fixed (17-bits, always zero) +// +// +// Address Space & Pointer Layout 3 +// -------------------------------- +// +// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) +// . . +// . . +// . . +// +--------------------------------+ 0x0000500000000000 (80TB) +// | Remapped View | +// +--------------------------------+ 0x0000400000000000 (64TB) +// . . +// +--------------------------------+ 0x0000300000000000 (48TB) +// | Marked1 View | +// +--------------------------------+ 0x0000200000000000 (32TB) +// | Marked0 View | +// +--------------------------------+ 0x0000100000000000 (16TB) +// . . +// +--------------------------------+ 0x0000000000000000 +// +// 6 4 4 4 4 +// 3 8 7 4 3 0 +// +------------------+----+-------------------------------------------------+ +// |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111| +// +------------------+----+-------------------------------------------------+ +// | | | +// | | * 43-0 Object Offset (44-bits, 16TB address space) +// | | +// | * 47-44 Metadata Bits (4-bits) 0001 = Marked0 (Address view 16-32TB) +// | 0010 = Marked1 (Address view 32-48TB) +// | 0100 = Remapped (Address view 64-80TB) +// | 1000 = Finalizable (Address view N/A) +// | +// * 63-48 Fixed (16-bits, always zero) +// + +// Default value if probing is not implemented for a certain platform: 128TB +static const size_t DEFAULT_MAX_ADDRESS_BIT = 47; +// Minimum value returned, if probing fails: 64GB +static const size_t MINIMUM_MAX_ADDRESS_BIT = 36; + +static size_t probe_valid_max_address_bit() { +#ifdef LINUX + size_t max_address_bit = 0; + const size_t page_size = os::vm_page_size(); + for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) { + const uintptr_t base_addr = ((uintptr_t) 1U) << i; + if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) { + // msync suceeded, the address is valid, and maybe even already mapped. + max_address_bit = i; + break; + } + if (errno != ENOMEM) { + // Some error occured. This should never happen, but msync + // has some undefined behavior, hence ignore this bit. +#ifdef ASSERT + fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); +#else // ASSERT + log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); +#endif // ASSERT + continue; + } + // Since msync failed with ENOMEM, the page might not be mapped. + // Try to map it, to see if the address is valid. + void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); + if (result_addr != MAP_FAILED) { + munmap(result_addr, page_size); + } + if ((uintptr_t) result_addr == base_addr) { + // address is valid + max_address_bit = i; + break; + } + } + if (max_address_bit == 0) { + // probing failed, allocate a very high page and take that bit as the maximum + const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT; + void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); + if (result_addr != MAP_FAILED) { + max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1; + munmap(result_addr, page_size); + } + } + log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit); + return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT); +#else // LINUX + return DEFAULT_MAX_ADDRESS_BIT; +#endif // LINUX +} + +size_t ZPlatformAddressOffsetBits() { + const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1; + const size_t max_address_offset_bits = valid_max_address_offset_bits - 3; + const size_t min_address_offset_bits = max_address_offset_bits - 2; + const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio); + const size_t address_offset_bits = log2i_exact(address_offset); + return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits); +} + +size_t ZPlatformAddressMetadataShift() { + return ZPlatformAddressOffsetBits(); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP +#define CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP + +const size_t ZPlatformGranuleSizeShift = 21; // 2MB +const size_t ZPlatformHeapViews = 3; +const size_t ZPlatformCacheLineSize = 64; + +size_t ZPlatformAddressOffsetBits(); +size_t ZPlatformAddressMetadataShift(); + +#endif // CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad @@ -0,0 +1,233 @@ +// +// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +source_hpp %{ + +#include "gc/shared/gc_globals.hpp" +#include "gc/z/c2/zBarrierSetC2.hpp" +#include "gc/z/zThreadLocalData.hpp" + +%} + +source %{ + +static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) { + if (barrier_data == ZLoadBarrierElided) { + return; + } + ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data); + __ ld(tmp, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); + __ andr(tmp, tmp, ref); + __ bnez(tmp, *stub->entry(), true /* far */); + __ bind(*stub->continuation()); +} + +static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { + ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong); + __ j(*stub->entry()); + __ bind(*stub->continuation()); +} + +%} + +// Load Pointer +instruct zLoadP(iRegPNoSp dst, memory mem) +%{ + match(Set dst (LoadP mem)); + predicate(UseZGC && (n->as_Load()->barrier_data() != 0)); + effect(TEMP dst); + + ins_cost(4 * DEFAULT_COST); + + format %{ "ld $dst, $mem, #@zLoadP" %} + + ins_encode %{ + const Address ref_addr (as_Register($mem$$base), $mem$$disp); + __ ld($dst$$Register, ref_addr); + z_load_barrier(_masm, this, ref_addr, $dst$$Register, t0 /* tmp */, barrier_data()); + %} + + ins_pipe(iload_reg_mem); +%} + +instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); + effect(KILL cr, TEMP_DEF res); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapP\n\t" + "mv $res, $res == $oldval" %} + + ins_encode %{ + Label failed; + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result_as_bool */); + __ beqz($res$$Register, failed); + __ mv(t0, $oldval$$Register); + __ bind(failed); + if (barrier_data() != ZLoadBarrierElided) { + Label good; + __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); + __ andr(t1, t1, t0); + __ beqz(t1, good); + z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result_as_bool */); + __ bind(good); + } + %} + + ins_pipe(pipe_slow); +%} + +instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); + effect(KILL cr, TEMP_DEF res); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapPAcq\n\t" + "mv $res, $res == $oldval" %} + + ins_encode %{ + Label failed; + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result_as_bool */); + __ beqz($res$$Register, failed); + __ mv(t0, $oldval$$Register); + __ bind(failed); + if (barrier_data() != ZLoadBarrierElided) { + Label good; + __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); + __ andr(t1, t1, t0); + __ beqz(t1, good); + z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result_as_bool */); + __ bind(good); + } + %} + + ins_pipe(pipe_slow); +%} + +instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); + effect(TEMP_DEF res); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangeP" %} + + ins_encode %{ + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); + if (barrier_data() != ZLoadBarrierElided) { + Label good; + __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); + __ andr(t0, t0, $res$$Register); + __ beqz(t0, good); + z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); + __ bind(good); + } + %} + + ins_pipe(pipe_slow); +%} + +instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + predicate(UseZGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); + effect(TEMP_DEF res); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangePAcq" %} + + ins_encode %{ + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); + if (barrier_data() != ZLoadBarrierElided) { + Label good; + __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); + __ andr(t0, t0, $res$$Register); + __ beqz(t0, good); + z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); + __ bind(good); + } + %} + + ins_pipe(pipe_slow); +%} + +instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ + match(Set prev (GetAndSetP mem newv)); + predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); + effect(TEMP_DEF prev, KILL cr); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ "atomic_xchg $prev, $newv, [$mem], #@zGetAndSetP" %} + + ins_encode %{ + __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); + z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); + %} + + ins_pipe(pipe_serial); +%} + +instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ + match(Set prev (GetAndSetP mem newv)); + predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0)); + effect(TEMP_DEF prev, KILL cr); + + ins_cost(VOLATILE_REF_COST); + + format %{ "atomic_xchg_acq $prev, $newv, [$mem], #@zGetAndSetPAcq" %} + + ins_encode %{ + __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); + z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); + %} + ins_pipe(pipe_serial); +%} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP +#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP + +const int StackAlignmentInBytes = 16; + +// Indicates whether the C calling conventions require that +// 32-bit integer argument values are extended to 64 bits. +const bool CCallingConventionRequiresIntsAsLongs = false; + +// RISCV has adopted a multicopy atomic model closely following +// that of ARMv8. +#define CPU_MULTI_COPY_ATOMIC + +// To be safe, we deoptimize when we come across an access that needs +// patching. This is similar to what is done on aarch64. +#define DEOPTIMIZE_WHEN_PATCHING + +#define SUPPORTS_NATIVE_CX8 + +#define SUPPORT_RESERVED_STACK_AREA + +#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false + +#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/globals_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/globals_riscv.hpp @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GLOBALS_RISCV_HPP +#define CPU_RISCV_GLOBALS_RISCV_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks +define_pd_global(bool, TrapBasedNullChecks, false); +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast + +define_pd_global(uintx, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment. +define_pd_global(intx, CodeEntryAlignment, 64); +define_pd_global(intx, OptoLoopAlignment, 16); +define_pd_global(intx, InlineFrequencyCount, 100); + +#define DEFAULT_STACK_YELLOW_PAGES (2) +#define DEFAULT_STACK_RED_PAGES (1) +// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the +// stack if compiled for unix and LP64. To pass stack overflow tests we need +// 20 shadow pages. +#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5)) +#define DEFAULT_STACK_RESERVED_PAGES (1) + +#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES +#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES +#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES +#define MIN_STACK_RESERVED_PAGES (0) + +define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); +define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); +define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); +define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); + +define_pd_global(bool, RewriteBytecodes, true); +define_pd_global(bool, RewriteFrequentPairs, true); + +define_pd_global(bool, PreserveFramePointer, false); + +define_pd_global(uintx, TypeProfileLevel, 111); + +define_pd_global(bool, CompactStrings, true); + +// Clear short arrays bigger than one word in an arch-specific way +define_pd_global(intx, InitArrayShortSize, BytesPerLong); + +define_pd_global(intx, InlineSmallCode, 1000); + +#define ARCH_FLAGS(develop, \ + product, \ + notproduct, \ + range, \ + constraint) \ + \ + product(bool, NearCpool, true, \ + "constant pool is close to instructions") \ + product(intx, BlockZeroingLowLimit, 256, \ + "Minimum size in bytes when block zeroing will be used") \ + range(1, max_jint) \ + product(bool, TraceTraps, false, "Trace all traps the signal handler") \ + /* For now we're going to be safe and add the I/O bits to userspace fences. */ \ + product(bool, UseConservativeFence, true, \ + "Extend i for r and o for w in the pred/succ flags of fence;" \ + "Extend fence.i to fence.i + fence.") \ + product(bool, AvoidUnalignedAccesses, true, \ + "Avoid generating unaligned memory accesses") \ + product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions") \ + product(bool, UseRVB, false, EXPERIMENTAL, "Use RVB instructions") \ + product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions") \ + product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \ + "Use RVV instructions for left/right shift of BigInteger") + +#endif // CPU_RISCV_GLOBALS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/icBuffer_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/icBuffer_riscv.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/icBuffer.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/bytecodes.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/oop.inline.hpp" + +int InlineCacheBuffer::ic_stub_code_size() { + // 6: auipc + ld + auipc + jalr + address(2 * instruction_size) + // 5: auipc + ld + j + address(2 * instruction_size) + return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size; +} + +#define __ masm-> + +void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { + assert_cond(code_begin != NULL && entry_point != NULL); + ResourceMark rm; + CodeBuffer code(code_begin, ic_stub_code_size()); + MacroAssembler* masm = new MacroAssembler(&code); + // Note: even though the code contains an embedded value, we do not need reloc info + // because + // (1) the value is old (i.e., doesn't matter for scavenges) + // (2) these ICStubs are removed *before* a GC happens, so the roots disappear + + address start = __ pc(); + Label l; + __ ld(t1, l); + __ far_jump(ExternalAddress(entry_point)); + __ align(wordSize); + __ bind(l); + __ emit_int64((intptr_t)cached_value); + // Only need to invalidate the 1st two instructions - not the whole ic stub + ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size()); + assert(__ pc() - start == ic_stub_code_size(), "must be"); +} + +address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object + NativeJump* jump = nativeJump_at(move->next_instruction_address()); + return jump->jump_destination(); +} + + +void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { + // The word containing the cached value is at the end of this IC buffer + uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize); + void* o = (void*)*p; + return o; +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/icache_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/icache_riscv.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "runtime/icache.hpp" + +#define __ _masm-> + +static int icache_flush(address addr, int lines, int magic) { + os::icache_flush((long int) addr, (long int) (addr + (lines << ICache::log2_line_size))); + return magic; +} + +void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) { + address start = (address)icache_flush; + *flush_icache_stub = (ICache::flush_icache_stub_t)start; + + // ICache::invalidate_range() contains explicit condition that the first + // call is invoked on the generated icache flush stub code range. + ICache::invalidate_range(start, 0); + + { + StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush"); + __ ret(); + } +} + +#undef __ Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/icache_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/icache_riscv.hpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_ICACHE_RISCV_HPP +#define CPU_RISCV_ICACHE_RISCV_HPP + +// Interface for updating the instruction cache. Whenever the VM +// modifies code, part of the processor instruction cache potentially +// has to be flushed. + +class ICache : public AbstractICache { +public: + enum { + stub_size = 16, // Size of the icache flush stub in bytes + line_size = BytesPerWord, // conservative + log2_line_size = LogBytesPerWord // log2(line_size) + }; +}; + +#endif // CPU_RISCV_ICACHE_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interp_masm_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interp_masm_riscv.cpp @@ -0,0 +1,1965 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interp_masm_riscv.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "logging/log.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markWord.hpp" +#include "oops/method.hpp" +#include "oops/methodData.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.inline.hpp" +#include "utilities/powerOfTwo.hpp" + +void InterpreterMacroAssembler::narrow(Register result) { + // Get method->_constMethod->_result_type + ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize)); + ld(t0, Address(t0, Method::const_offset())); + lbu(t0, Address(t0, ConstMethod::result_type_offset())); + + Label done, notBool, notByte, notChar; + + // common case first + mv(t1, T_INT); + beq(t0, t1, done); + + // mask integer result to narrower return type. + mv(t1, T_BOOLEAN); + bne(t0, t1, notBool); + + andi(result, result, 0x1); + j(done); + + bind(notBool); + mv(t1, T_BYTE); + bne(t0, t1, notByte); + sign_extend(result, result, 8); + j(done); + + bind(notByte); + mv(t1, T_CHAR); + bne(t0, t1, notChar); + zero_extend(result, result, 16); + j(done); + + bind(notChar); + sign_extend(result, result, 16); + + // Nothing to do for T_INT + bind(done); + addw(result, result, zr); +} + +void InterpreterMacroAssembler::jump_to_entry(address entry) { + assert(entry != NULL, "Entry must have been generated by now"); + j(entry); +} + +void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { + if (JvmtiExport::can_pop_frame()) { + Label L; + // Initiate popframe handling only if it is not already being + // processed. If the flag has the popframe_processing bit set, + // it means that this code is called *during* popframe handling - we + // don't want to reenter. + // This method is only called just after the call into the vm in + // call_VM_base, so the arg registers are available. + lwu(t1, Address(xthread, JavaThread::popframe_condition_offset())); + andi(t0, t1, JavaThread::popframe_pending_bit); + beqz(t0, L); + andi(t0, t1, JavaThread::popframe_processing_bit); + bnez(t0, L); + // Call Interpreter::remove_activation_preserving_args_entry() to get the + // address of the same-named entrypoint in the generated interpreter code. + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); + jr(x10); + bind(L); + } +} + + +void InterpreterMacroAssembler::load_earlyret_value(TosState state) { + ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset())); + const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset()); + const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset()); + const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset()); + switch (state) { + case atos: + ld(x10, oop_addr); + sd(zr, oop_addr); + verify_oop(x10); + break; + case ltos: + ld(x10, val_addr); + break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: + lwu(x10, val_addr); + break; + case ftos: + flw(f10, val_addr); + break; + case dtos: + fld(f10, val_addr); + break; + case vtos: + /* nothing to do */ + break; + default: + ShouldNotReachHere(); + } + // Clean up tos value in the thread object + mvw(t0, (int) ilgl); + sw(t0, tos_addr); + sw(zr, val_addr); +} + + +void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { + if (JvmtiExport::can_force_early_return()) { + Label L; + ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); + beqz(t0, L); // if [thread->jvmti_thread_state() == NULL] then exit + + // Initiate earlyret handling only if it is not already being processed. + // If the flag has the earlyret_processing bit set, it means that this code + // is called *during* earlyret handling - we don't want to reenter. + lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset())); + mv(t1, JvmtiThreadState::earlyret_pending); + bne(t0, t1, L); + + // Call Interpreter::remove_activation_early_entry() to get the address of the + // same-named entrypoint in the generated interpreter code. + ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); + lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset())); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0); + jr(x10); + bind(L); + } +} + +void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) { + assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); + lhu(reg, Address(xbcp, bcp_offset)); + revb_h(reg, reg); +} + +void InterpreterMacroAssembler::get_dispatch() { + int32_t offset = 0; + la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset); + addi(xdispatch, xdispatch, offset); +} + +void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, + int bcp_offset, + size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + if (index_size == sizeof(u2)) { + load_unsigned_short(index, Address(xbcp, bcp_offset)); + } else if (index_size == sizeof(u4)) { + lwu(index, Address(xbcp, bcp_offset)); + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. + assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); + xori(index, index, -1); + addw(index, index, zr); + } else if (index_size == sizeof(u1)) { + load_unsigned_byte(index, Address(xbcp, bcp_offset)); + } else { + ShouldNotReachHere(); + } +} + +// Return +// Rindex: index into constant pool +// Rcache: address of cache entry - ConstantPoolCache::base_offset() +// +// A caller must add ConstantPoolCache::base_offset() to Rcache to get +// the true address of the cache entry. +// +void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, + Register index, + int bcp_offset, + size_t index_size) { + assert_different_registers(cache, index); + assert_different_registers(cache, xcpool); + get_cache_index_at_bcp(index, bcp_offset, index_size); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // Convert from field index to ConstantPoolCacheEntry + // riscv already has the cache in xcpool so there is no need to + // install it in cache. Instead we pre-add the indexed offset to + // xcpool and return it in cache. All clients of this method need to + // be modified accordingly. + shadd(cache, index, xcpool, cache, 5); +} + + +void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, + Register index, + Register bytecode, + int byte_no, + int bcp_offset, + size_t index_size) { + get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); + // We use a 32-bit load here since the layout of 64-bit words on + // little-endian machines allow us that. + // n.b. unlike x86 cache already includes the index offset + la(bytecode, Address(cache, + ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::indices_offset())); + membar(MacroAssembler::AnyAny); + lwu(bytecode, bytecode); + membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + const int shift_count = (1 + byte_no) * BitsPerByte; + slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte)); + srli(bytecode, bytecode, XLEN - BitsPerByte); +} + +void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, + Register tmp, + int bcp_offset, + size_t index_size) { + assert(cache != tmp, "must use different register"); + get_cache_index_at_bcp(tmp, bcp_offset, index_size); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // Convert from field index to ConstantPoolCacheEntry index + // and from word offset to byte offset + assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, + "else change next line"); + ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); + // skip past the header + add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); + // construct pointer to cache entry + shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord); +} + +// Load object from cpool->resolved_references(index) +void InterpreterMacroAssembler::load_resolved_reference_at_index( + Register result, Register index, Register tmp) { + assert_different_registers(result, index); + + get_constant_pool(result); + // Load pointer for resolved_references[] objArray + ld(result, Address(result, ConstantPool::cache_offset_in_bytes())); + ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes())); + resolve_oop_handle(result, tmp); + // Add in the index + addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + shadd(result, index, result, index, LogBytesPerHeapOop); + load_heap_oop(result, Address(result, 0)); +} + +void InterpreterMacroAssembler::load_resolved_klass_at_offset( + Register cpool, Register index, Register klass, Register temp) { + shadd(temp, index, cpool, temp, LogBytesPerWord); + lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index + ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses + shadd(klass, temp, klass, temp, LogBytesPerWord); + ld(klass, Address(klass, Array::base_offset_in_bytes())); +} + +void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no, + Register method, + Register cache) { + const int method_offset = in_bytes( + ConstantPoolCache::base_offset() + + ((byte_no == TemplateTable::f2_byte) + ? ConstantPoolCacheEntry::f2_offset() + : ConstantPoolCacheEntry::f1_offset())); + + ld(method, Address(cache, method_offset)); // get f1 Method* +} + +// Generate a subtype check: branch to ok_is_subtype if sub_klass is a +// subtype of super_klass. +// +// Args: +// x10: superklass +// Rsub_klass: subklass +// +// Kills: +// x12, x15 +void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, + Label& ok_is_subtype) { + assert(Rsub_klass != x10, "x10 holds superklass"); + assert(Rsub_klass != x12, "x12 holds 2ndary super array length"); + assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr"); + + // Profile the not-null value's klass. + profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15 + + // Do the check. + check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12 + + // Profile the failure of the check. + profile_typecheck_failed(x12); // blows x12 +} + +// Java Expression Stack + +void InterpreterMacroAssembler::pop_ptr(Register r) { + ld(r, Address(esp, 0)); + addi(esp, esp, wordSize); +} + +void InterpreterMacroAssembler::pop_i(Register r) { + lw(r, Address(esp, 0)); // lw do signed extended + addi(esp, esp, wordSize); +} + +void InterpreterMacroAssembler::pop_l(Register r) { + ld(r, Address(esp, 0)); + addi(esp, esp, 2 * Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_ptr(Register r) { + addi(esp, esp, -wordSize); + sd(r, Address(esp, 0)); +} + +void InterpreterMacroAssembler::push_i(Register r) { + addi(esp, esp, -wordSize); + addw(r, r, zr); // signed extended + sd(r, Address(esp, 0)); +} + +void InterpreterMacroAssembler::push_l(Register r) { + addi(esp, esp, -2 * wordSize); + sd(zr, Address(esp, wordSize)); + sd(r, Address(esp)); +} + +void InterpreterMacroAssembler::pop_f(FloatRegister r) { + flw(r, esp, 0); + addi(esp, esp, wordSize); +} + +void InterpreterMacroAssembler::pop_d(FloatRegister r) { + fld(r, esp, 0); + addi(esp, esp, 2 * Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_f(FloatRegister r) { + addi(esp, esp, -wordSize); + fsw(r, Address(esp, 0)); +} + +void InterpreterMacroAssembler::push_d(FloatRegister r) { + addi(esp, esp, -2 * wordSize); + fsd(r, Address(esp, 0)); +} + +void InterpreterMacroAssembler::pop(TosState state) { + switch (state) { + case atos: + pop_ptr(); + verify_oop(x10); + break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: + pop_i(); + break; + case ltos: + pop_l(); + break; + case ftos: + pop_f(); + break; + case dtos: + pop_d(); + break; + case vtos: + /* nothing to do */ + break; + default: + ShouldNotReachHere(); + } +} + +void InterpreterMacroAssembler::push(TosState state) { + switch (state) { + case atos: + verify_oop(x10); + push_ptr(); + break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: + push_i(); + break; + case ltos: + push_l(); + break; + case ftos: + push_f(); + break; + case dtos: + push_d(); + break; + case vtos: + /* nothing to do */ + break; + default: + ShouldNotReachHere(); + } +} + +// Helpers for swap and dup +void InterpreterMacroAssembler::load_ptr(int n, Register val) { + ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); +} + +void InterpreterMacroAssembler::store_ptr(int n, Register val) { + sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); +} + +void InterpreterMacroAssembler::load_float(Address src) { + flw(f10, src); +} + +void InterpreterMacroAssembler::load_double(Address src) { + fld(f10, src); +} + +void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { + // set sender sp + mv(x30, sp); + // record last_sp + sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); +} + +// Jump to from_interpreted entry of a call unless single stepping is possible +// in this thread in which case we must call the i2i entry +void InterpreterMacroAssembler::jump_from_interpreted(Register method) { + prepare_to_jump_from_interpreted(); + if (JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset())); + beqz(t0, run_compiled_code); + ld(t0, Address(method, Method::interpreter_entry_offset())); + jr(t0); + bind(run_compiled_code); + } + + ld(t0, Address(method, Method::from_interpreted_offset())); + jr(t0); +} + +// The following two routines provide a hook so that an implementation +// can schedule the dispatch in two parts. amd64 does not do this. +void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { +} + +void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { + dispatch_next(state, step); +} + +void InterpreterMacroAssembler::dispatch_base(TosState state, + address* table, + bool verifyoop, + bool generate_poll, + Register Rs) { + // Pay attention to the argument Rs, which is acquiesce in t0. + if (VerifyActivationFrameSize) { + Unimplemented(); + } + if (verifyoop && state == atos) { + verify_oop(x10); + } + + Label safepoint; + address* const safepoint_table = Interpreter::safept_table(state); + bool needs_thread_local_poll = generate_poll && table != safepoint_table; + + if (needs_thread_local_poll) { + NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); + ld(t1, Address(xthread, JavaThread::polling_word_offset())); + andi(t1, t1, SafepointMechanism::poll_bit()); + bnez(t1, safepoint); + } + if (table == Interpreter::dispatch_table(state)) { + li(t1, Interpreter::distance_from_dispatch_table(state)); + add(t1, Rs, t1); + shadd(t1, t1, xdispatch, t1, 3); + } else { + mv(t1, (address)table); + shadd(t1, Rs, t1, Rs, 3); + } + ld(t1, Address(t1)); + jr(t1); + + if (needs_thread_local_poll) { + bind(safepoint); + la(t1, ExternalAddress((address)safepoint_table)); + shadd(t1, Rs, t1, Rs, 3); + ld(t1, Address(t1)); + jr(t1); + } +} + +void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) { + dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs); +} + +void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) { + dispatch_base(state, Interpreter::normal_table(state), Rs); +} + +void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) { + dispatch_base(state, Interpreter::normal_table(state), false, Rs); +} + +void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { + // load next bytecode + load_unsigned_byte(t0, Address(xbcp, step)); + add(xbcp, xbcp, step); + dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); +} + +void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { + // load current bytecode + lbu(t0, Address(xbcp, 0)); + dispatch_base(state, table); +} + +// remove activation +// +// Apply stack watermark barrier. +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from syncronized blocks. +// Remove the activation from the stack. +// +// If there are locked Java monitors +// If throw_monitor_exception +// throws IllegalMonitorStateException +// Else if install_monitor_exception +// installs IllegalMonitorStateException +// Else +// no error processing +void InterpreterMacroAssembler::remove_activation( + TosState state, + bool throw_monitor_exception, + bool install_monitor_exception, + bool notify_jvmdi) { + // Note: Registers x13 may be in use for the + // result check if synchronized method + Label unlocked, unlock, no_unlock; + + // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, + // that would normally not be safe to use. Such bad returns into unsafe territory of + // the stack, will call InterpreterRuntime::at_unwind. + Label slow_path; + Label fast_path; + safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */); + j(fast_path); + + bind(slow_path); + push(state); + set_last_Java_frame(esp, fp, (address)pc(), t0); + super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread); + reset_last_Java_frame(true); + pop(state); + + bind(fast_path); + + // get the value of _do_not_unlock_if_synchronized into x13 + const Address do_not_unlock_if_synchronized(xthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + lbu(x13, do_not_unlock_if_synchronized); + sb(zr, do_not_unlock_if_synchronized); // reset the flag + + // get method access flags + ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize)); + ld(x12, Address(x11, Method::access_flags_offset())); + andi(t0, x12, JVM_ACC_SYNCHRONIZED); + beqz(t0, unlocked); + + // Don't unlock anything if the _do_not_unlock_if_synchronized flag + // is set. + bnez(x13, no_unlock); + + // unlock monitor + push(state); // save result + + // BasicObjectLock will be first in list, since this is a + // synchronized method. However, need to check that the object has + // not been unlocked by an explicit monitorexit bytecode. + const Address monitor(fp, frame::interpreter_frame_initial_sp_offset * + wordSize - (int) sizeof(BasicObjectLock)); + // We use c_rarg1 so that if we go slow path it will be the correct + // register for unlock_object to pass to VM directly + la(c_rarg1, monitor); // address of first monitor + + ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + bnez(x10, unlock); + + pop(state); + if (throw_monitor_exception) { + // Entry already unlocked, need to throw exception + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Monitor already unlocked during a stack unroll. If requested, + // install an illegal_monitor_state_exception. Continue with + // stack unrolling. + if (install_monitor_exception) { + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::new_illegal_monitor_state_exception)); + } + j(unlocked); + } + + bind(unlock); + unlock_object(c_rarg1); + pop(state); + + // Check that for block-structured locking (i.e., that all locked + // objects has been unlocked) + bind(unlocked); + + // x10: Might contain return value + + // Check that all monitors are unlocked + { + Label loop, exception, entry, restart; + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + const Address monitor_block_top( + fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); + const Address monitor_block_bot( + fp, frame::interpreter_frame_initial_sp_offset * wordSize); + + bind(restart); + // We use c_rarg1 so that if we go slow path it will be the correct + // register for unlock_object to pass to VM directly + ld(c_rarg1, monitor_block_top); // points to current entry, starting + // with top-most entry + la(x9, monitor_block_bot); // points to word before bottom of + // monitor block + + j(entry); + + // Entry already locked, need to throw exception + bind(exception); + + if (throw_monitor_exception) { + // Throw exception + MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime:: + throw_illegal_monitor_state_exception)); + + should_not_reach_here(); + } else { + // Stack unrolling. Unlock object and install illegal_monitor_exception. + // Unlock does not block, so don't have to worry about the frame. + // We don't have to preserve c_rarg1 since we are going to throw an exception. + + push(state); + unlock_object(c_rarg1); + pop(state); + + if (install_monitor_exception) { + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + new_illegal_monitor_state_exception)); + } + + j(restart); + } + + bind(loop); + // check if current entry is used + add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes()); + ld(t0, Address(t0, 0)); + bnez(t0, exception); + + add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry + bind(entry); + bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry + } + + bind(no_unlock); + + // jvmti support + if (notify_jvmdi) { + notify_method_exit(state, NotifyJVMTI); // preserve TOSCA + + } else { + notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA + } + + // remove activation + // get sender esp + ld(t1, + Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); + if (StackReservedPages > 0) { + // testing if reserved zone needs to be re-enabled + Label no_reserved_zone_enabling; + + ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); + ble(t1, t0, no_reserved_zone_enabling); + + call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_delayed_StackOverflowError)); + should_not_reach_here(); + + bind(no_reserved_zone_enabling); + } + + // restore sender esp + mv(esp, t1); + + // remove frame anchor + leave(); + // If we're returning to interpreted code we will shortly be + // adjusting SP to allow some space for ESP. If we're returning to + // compiled code the saved sender SP was saved in sender_sp, so this + // restores it. + andi(sp, esp, -16); +} + +// Lock object +// +// Args: +// c_rarg1: BasicObjectLock to be used for locking +// +// Kills: +// x10 +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs) +// t0, t1 (temp regs) +void InterpreterMacroAssembler::lock_object(Register lock_reg) +{ + assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1"); + if (UseHeavyMonitors) { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + } else { + Label done; + + const Register swap_reg = x10; + const Register tmp = c_rarg2; + const Register obj_reg = c_rarg3; // Will contain the oop + + const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); + const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); + const int mark_offset = lock_offset + + BasicLock::displaced_header_offset_in_bytes(); + + Label slow_case; + + // Load object pointer into obj_reg c_rarg3 + ld(obj_reg, Address(lock_reg, obj_offset)); + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(tmp, obj_reg); + lwu(tmp, Address(tmp, Klass::access_flags_offset())); + andi(tmp, tmp, JVM_ACC_IS_VALUE_BASED_CLASS); + bnez(tmp, slow_case); + } + + if (UseBiasedLocking) { + biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); + } + + // Load (object->mark() | 1) into swap_reg + ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + ori(swap_reg, t0, 1); + + // Save (object->mark() | 1) into BasicLock's displaced header + sd(swap_reg, Address(lock_reg, mark_offset)); + + assert(lock_offset == 0, + "displached header must be first word in BasicObjectLock"); + + if (PrintBiasedLockingStatistics) { + Label fast, fail; + cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail); + bind(fast); + atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), + t1, t0); + j(done); + bind(fail); + } else { + cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL); + } + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 7) == 0, and + // 2) sp <= mark < mark + os::pagesize() + // + // These 3 tests can be done by evaluating the following + // expression: ((mark - sp) & (7 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 3 bits clear. + // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg + sub(swap_reg, swap_reg, sp); + li(t0, (int64_t)(7 - os::vm_page_size())); + andr(swap_reg, swap_reg, t0); + + // Save the test result, for recursive case, the result is zero + sd(swap_reg, Address(lock_reg, mark_offset)); + + if (PrintBiasedLockingStatistics) { + bnez(swap_reg, slow_case); + atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), + t1, t0); + } + beqz(swap_reg, done); + + bind(slow_case); + + // Call the runtime routine for slow case + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + + bind(done); + } +} + + +// Unlocks an object. Used in monitorexit bytecode and +// remove_activation. Throws an IllegalMonitorException if object is +// not locked by current thread. +// +// Args: +// c_rarg1: BasicObjectLock for lock +// +// Kills: +// x10 +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs) +// t0, t1 (temp regs) +void InterpreterMacroAssembler::unlock_object(Register lock_reg) +{ + assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); + + if (UseHeavyMonitors) { + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + } else { + Label done; + + const Register swap_reg = x10; + const Register header_reg = c_rarg2; // Will contain the old oopMark + const Register obj_reg = c_rarg3; // Will contain the oop + + save_bcp(); // Save in case of exception + + // Convert from BasicObjectLock structure to object and BasicLock + // structure Store the BasicLock address into x10 + la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + + // Load oop into obj_reg(c_rarg3) + ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + + // Free entry + sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + + if (UseBiasedLocking) { + biased_locking_exit(obj_reg, header_reg, done); + } + + // Load the old header from BasicLock structure + ld(header_reg, Address(swap_reg, + BasicLock::displaced_header_offset_in_bytes())); + + // Test for recursion + beqz(header_reg, done); + + // Atomic swap back the old header + cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL); + + // Call the runtime routine for slow case. + sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + + bind(done); + + restore_bcp(); + } +} + + +void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, + Label& zero_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); + beqz(mdp, zero_continue); +} + +// Set the method data pointer for the current bcp. +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label set_mdp; + push_reg(0xc00, sp); // save x10, x11 + + // Test MDO to avoid the call if it is NULL. + ld(x10, Address(xmethod, in_bytes(Method::method_data_offset()))); + beqz(x10, set_mdp); + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp); + // x10: mdi + // mdo is guaranteed to be non-zero here, we checked for it before the call. + ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); + la(x11, Address(x11, in_bytes(MethodData::data_offset()))); + add(x10, x11, x10); + sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); + bind(set_mdp); + pop_reg(0xc00, sp); +} + +void InterpreterMacroAssembler::verify_method_data_pointer() { + assert(ProfileInterpreter, "must be profiling interpreter"); +#ifdef ASSERT + Label verify_continue; + add(sp, sp, -4 * wordSize); + sd(x10, Address(sp, 0)); + sd(x11, Address(sp, wordSize)); + sd(x12, Address(sp, 2 * wordSize)); + sd(x13, Address(sp, 3 * wordSize)); + test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue + get_method(x11); + + // If the mdp is valid, it will point to a DataLayout header which is + // consistent with the bcp. The converse is highly probable also. + lh(x12, Address(x13, in_bytes(DataLayout::bci_offset()))); + ld(t0, Address(x11, Method::const_offset())); + add(x12, x12, t0); + la(x12, Address(x12, ConstMethod::codes_offset())); + beq(x12, xbcp, verify_continue); + // x10: method + // xbcp: bcp // xbcp == 22 + // x13: mdp + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), + x11, xbcp, x13); + bind(verify_continue); + ld(x10, Address(sp, 0)); + ld(x11, Address(sp, wordSize)); + ld(x12, Address(sp, 2 * wordSize)); + ld(x13, Address(sp, 3 * wordSize)); + add(sp, sp, 4 * wordSize); +#endif // ASSERT +} + + +void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, + int constant, + Register value) { + assert(ProfileInterpreter, "must be profiling interpreter"); + Address data(mdp_in, constant); + sd(value, data); +} + + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + int constant, + bool decrement) { + increment_mdp_data_at(mdp_in, noreg, constant, decrement); +} + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + Register reg, + int constant, + bool decrement) { + assert(ProfileInterpreter, "must be profiling interpreter"); + // %%% this does 64bit counters at best it is wasting space + // at worst it is a rare bug when counters overflow + + assert_different_registers(t1, t0, mdp_in, reg); + + Address addr1(mdp_in, constant); + Address addr2(t1, 0); + Address &addr = addr1; + if (reg != noreg) { + la(t1, addr1); + add(t1, t1, reg); + addr = addr2; + } + + if (decrement) { + ld(t0, addr); + addi(t0, t0, -DataLayout::counter_increment); + Label L; + bltz(t0, L); // skip store if counter underflow + sd(t0, addr); + bind(L); + } else { + assert(DataLayout::counter_increment == 1, + "flow-free idiom only works with 1"); + ld(t0, addr); + addi(t0, t0, DataLayout::counter_increment); + Label L; + blez(t0, L); // skip store if counter overflow + sd(t0, addr); + bind(L); + } +} + +void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, + int flag_byte_constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + int flags_offset = in_bytes(DataLayout::flags_offset()); + // Set the flag + lbu(t1, Address(mdp_in, flags_offset)); + ori(t1, t1, flag_byte_constant); + sb(t1, Address(mdp_in, flags_offset)); +} + + +void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, + int offset, + Register value, + Register test_value_out, + Label& not_equal_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + if (test_value_out == noreg) { + ld(t1, Address(mdp_in, offset)); + bne(value, t1, not_equal_continue); + } else { + // Put the test value into a register, so caller can use it: + ld(test_value_out, Address(mdp_in, offset)); + bne(value, test_value_out, not_equal_continue); + } +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ld(t1, Address(mdp_in, offset_of_disp)); + add(mdp_in, mdp_in, t1); + sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); +} + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + Register reg, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + add(t1, mdp_in, reg); + ld(t1, Address(t1, offset_of_disp)); + add(mdp_in, mdp_in, t1); + sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, + int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + addi(mdp_in, mdp_in, (unsigned)constant); + sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { + assert(ProfileInterpreter, "must be profiling interpreter"); + + // save/restore across call_VM + addi(sp, sp, -2 * wordSize); + sd(zr, Address(sp, 0)); + sd(return_bci, Address(sp, wordSize)); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), + return_bci); + ld(zr, Address(sp, 0)); + ld(return_bci, Address(sp, wordSize)); + addi(sp, sp, 2 * wordSize); +} + +void InterpreterMacroAssembler::profile_taken_branch(Register mdp, + Register bumped_count) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + // Otherwise, assign to mdp + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the taken count. + Address data(mdp, in_bytes(JumpData::taken_offset())); + ld(bumped_count, data); + assert(DataLayout::counter_increment == 1, + "flow-free idiom only works with 1"); + addi(bumped_count, bumped_count, DataLayout::counter_increment); + Label L; + // eg: bumped_count=0x7fff ffff ffff ffff + 1 < 0. so we use <= 0; + blez(bumped_count, L); // skip store if counter overflow, + sd(bumped_count, data); + bind(L); + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the not taken count. + increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); + + // The method data pointer needs to be updated to correspond to + // the next bytecode + update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_final_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_virtual_call(Register receiver, + Register mdp, + Register reg2, + bool receiver_can_be_null) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label skip_receiver_profile; + if (receiver_can_be_null) { + Label not_null; + // We are making a call. Increment the count for null receiver. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + j(skip_receiver_profile); + bind(not_null); + } + + // Record the receiver type. + record_klass_in_profile(receiver, mdp, reg2, true); + bind(skip_receiver_profile); + + // The method data pointer needs to be updated to reflect the new target. + + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + +// This routine creates a state machine for updating the multi-row +// type profile at a virtual call site (or other type-sensitive bytecode). +// The machine visits each row (of receiver/count) until the receiver type +// is found, or until it runs out of rows. At the same time, it remembers +// the location of the first empty row. (An empty row records null for its +// receiver, and can be allocated for a newly-observed receiver type.) +// Because there are two degrees of freedom in the state, a simple linear +// search will not work; it must be a decision tree. Hence this helper +// function is recursive, to generate the required tree structured code. +// It's the interpreter, so we are trading off code space for speed. +// See below for example code. +void InterpreterMacroAssembler::record_klass_in_profile_helper( + Register receiver, Register mdp, + Register reg2, + Label& done, bool is_virtual_call) { + if (TypeProfileWidth == 0) { + if (is_virtual_call) { + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + } + + } else { + int non_profiled_offset = -1; + if (is_virtual_call) { + non_profiled_offset = in_bytes(CounterData::count_offset()); + } + + record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth, + &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); + } +} + +void InterpreterMacroAssembler::record_item_in_profile_helper( + Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) { + int last_row = total_rows - 1; + assert(start_row <= last_row, "must be work left to do"); + // Test this row for both the item and for null. + // Take any of three different outcomes: + // 1. found item => increment count and goto done + // 2. found null => keep looking for case 1, maybe allocate this cell + // 3. found something else => keep looking for cases 1 and 2 + // Case 3 is handled by a recursive call. + for (int row = start_row; row <= last_row; row++) { + Label next_test; + bool test_for_null_also = (row == start_row); + + // See if the item is item[n]. + int item_offset = in_bytes(item_offset_fn(row)); + test_mdp_data_at(mdp, item_offset, item, + (test_for_null_also ? reg2 : noreg), + next_test); + // (Reg2 now contains the item from the CallData.) + + // The item is item[n]. Increment count[n]. + int count_offset = in_bytes(item_count_offset_fn(row)); + increment_mdp_data_at(mdp, count_offset); + j(done); + bind(next_test); + + if (test_for_null_also) { + Label found_null; + // Failed the equality check on item[n]... Test for null. + if (start_row == last_row) { + // The only thing left to do is handle the null case. + if (non_profiled_offset >= 0) { + beqz(reg2, found_null); + // Item did not match any saved item and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + increment_mdp_data_at(mdp, non_profiled_offset); + j(done); + bind(found_null); + } else { + bnez(reg2, done); + } + break; + } + // Since null is rare, make it be the branch-taken case. + beqz(reg2, found_null); + + // Put all the "Case 3" tests here. + record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows, + item_offset_fn, item_count_offset_fn, non_profiled_offset); + + // Found a null. Keep searching for a matching item, + // but remember that this is an empty (unused) slot. + bind(found_null); + } + } + + // In the fall-through case, we found no matching item, but we + // observed the item[start_row] is NULL. + // Fill in the item field and increment the count. + int item_offset = in_bytes(item_offset_fn(start_row)); + set_mdp_data_at(mdp, item_offset, item); + int count_offset = in_bytes(item_count_offset_fn(start_row)); + mv(reg2, DataLayout::counter_increment); + set_mdp_data_at(mdp, count_offset, reg2); + if (start_row > 0) { + j(done); + } +} + +// Example state machine code for three profile rows: +// # main copy of decision tree, rooted at row[1] +// if (row[0].rec == rec) then [ +// row[0].incr() +// goto done +// ] +// if (row[0].rec != NULL) then [ +// # inner copy of decision tree, rooted at row[1] +// if (row[1].rec == rec) then [ +// row[1].incr() +// goto done +// ] +// if (row[1].rec != NULL) then [ +// # degenerate decision tree, rooted at row[2] +// if (row[2].rec == rec) then [ +// row[2].incr() +// goto done +// ] +// if (row[2].rec != NULL) then [ +// count.incr() +// goto done +// ] # overflow +// row[2].init(rec) +// goto done +// ] else [ +// # remember row[1] is empty +// if (row[2].rec == rec) then [ +// row[2].incr() +// goto done +// ] +// row[1].init(rec) +// goto done +// ] +// else [ +// # remember row[0] is empty +// if (row[1].rec == rec) then [ +// row[1].incr() +// goto done +// ] +// if (row[2].rec == rec) then [ +// row[2].incr() +// goto done +// ] +// row[0].init(rec) +// goto done +// ] +// done: + +void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, + Register mdp, Register reg2, + bool is_virtual_call) { + assert(ProfileInterpreter, "must be profiling"); + Label done; + + record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call); + + bind(done); +} + +void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the total ret count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + for (uint row = 0; row < RetData::row_limit(); row++) { + Label next_test; + + // See if return_bci is equal to bci[n]: + test_mdp_data_at(mdp, + in_bytes(RetData::bci_offset(row)), + return_bci, noreg, + next_test); + + // return_bci is equal to bci[n]. Increment the count. + increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, + in_bytes(RetData::bci_displacement_offset(row))); + j(profile_continue); + bind(next_test); + } + + update_mdp_for_ret(return_bci); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_null_seen(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { + if (ProfileInterpreter && TypeProfileCasts) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + int count_offset = in_bytes(CounterData::count_offset()); + // Back up the address, since we have already bumped the mdp. + count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); + + // *Decrement* the counter. We expect to see zero or small negatives. + increment_mdp_data_at(mdp, count_offset, true); + + bind (profile_continue); + } +} + +void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + + // Record the object type. + record_klass_in_profile(klass, mdp, reg2, false); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_switch_default(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the default case count + increment_mdp_data_at(mdp, + in_bytes(MultiBranchData::default_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + in_bytes(MultiBranchData:: + default_displacement_offset())); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_switch_case(Register index, + Register mdp, + Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Build the base (index * per_case_size_in_bytes()) + + // case_array_offset_in_bytes() + mvw(reg2, in_bytes(MultiBranchData::per_case_size())); + mvw(t0, in_bytes(MultiBranchData::case_array_offset())); + Assembler::mul(index, index, reg2); + Assembler::add(index, index, t0); + + // Update the case count + increment_mdp_data_at(mdp, + index, + in_bytes(MultiBranchData::relative_count_offset())); + + // The method data pointer need to be updated. + update_mdp_by_offset(mdp, + index, + in_bytes(MultiBranchData:: + relative_displacement_offset())); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; } + +void InterpreterMacroAssembler::notify_method_entry() { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (JvmtiExport::can_post_interpreter_events()) { + Label L; + lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset())); + beqz(x13, L); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_method_entry)); + bind(L); + } + + { + SkipIfEqual skip(this, &DTraceMethodProbes, false); + get_method(c_rarg1); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + xthread, c_rarg1); + } + + // RedefineClasses() tracing support for obsolete method entry + if (log_is_enabled(Trace, redefine, class, obsolete)) { + get_method(c_rarg1); + call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + xthread, c_rarg1); + } +} + + +void InterpreterMacroAssembler::notify_method_exit( + TosState state, NotifyMethodExitMode mode) { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { + Label L; + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. If this + // is changed then the interpreter_frame_result implementation will + // need to be updated too. + + // template interpreter will leave the result on the top of the stack. + push(state); + lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset())); + beqz(x13, L); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); + bind(L); + pop(state); + } + + { + SkipIfEqual skip(this, &DTraceMethodProbes, false); + push(state); + get_method(c_rarg1); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + xthread, c_rarg1); + pop(state); + } +} + + +// Jump if ((*counter_addr += increment) & mask) satisfies the condition. +void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, + int increment, Address mask, + Register tmp1, Register tmp2, + bool preloaded, Label* where) { + Label done; + if (!preloaded) { + lwu(tmp1, counter_addr); + } + add(tmp1, tmp1, increment); + sw(tmp1, counter_addr); + lwu(tmp2, mask); + andr(tmp1, tmp1, tmp2); + bnez(tmp1, done); + j(*where); // offset is too large so we have to use j instead of beqz here + bind(done); +} + +void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments) { + // interpreter specific + // + // Note: No need to save/restore rbcp & rlocals pointer since these + // are callee saved registers and no blocking/ GC can happen + // in leaf calls. +#ifdef ASSERT + { + Label L; + ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + beqz(t0, L); + stop("InterpreterMacroAssembler::call_VM_leaf_base:" + " last_sp != NULL"); + bind(L); + } +#endif /* ASSERT */ + // super call + MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); +} + +void InterpreterMacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + // interpreter specific + // + // Note: Could avoid restoring locals ptr (callee saved) - however doesn't + // really make a difference for these runtime calls, since they are + // slow anyway. Btw., bcp must be saved/restored since it may change + // due to GC. + save_bcp(); +#ifdef ASSERT + { + Label L; + ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + beqz(t0, L); + stop("InterpreterMacroAssembler::call_VM_base:" + " last_sp != NULL"); + bind(L); + } +#endif /* ASSERT */ + // super call + MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp, + entry_point, number_of_arguments, + check_exceptions); +// interpreter specific + restore_bcp(); + restore_locals(); +} + +void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) { + assert_different_registers(obj, tmp, t0, mdo_addr.base()); + Label update, next, none; + + verify_oop(obj); + + bnez(obj, update); + orptr(mdo_addr, TypeEntries::null_seen, t0, tmp); + j(next); + + bind(update); + load_klass(obj, obj); + + ld(t0, mdo_addr); + xorr(obj, obj, t0); + andi(t0, obj, TypeEntries::type_klass_mask); + beqz(t0, next); // klass seen before, nothing to + // do. The unknown bit may have been + // set already but no need to check. + + andi(t0, obj, TypeEntries::type_unknown); + bnez(t0, next); + // already unknown. Nothing to do anymore. + + ld(t0, mdo_addr); + beqz(t0, none); + li(tmp, (u1)TypeEntries::null_seen); + beq(t0, tmp, none); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + ld(t0, mdo_addr); + xorr(obj, obj, t0); + andi(t0, obj, TypeEntries::type_klass_mask); + beqz(t0, next); + + // different than before. Cannot keep accurate profile. + orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp); + j(next); + + bind(none); + // first time here. Set profile type. + sd(obj, mdo_addr); + + bind(next); +} + +void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { + if (!ProfileInterpreter) { + return; + } + + if (MethodData::profile_arguments() || MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(mdp, profile_continue); + + int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + + lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start)); + if (is_virtual) { + li(tmp, (u1)DataLayout::virtual_call_type_data_tag); + bne(t0, tmp, profile_continue); + } else { + li(tmp, (u1)DataLayout::call_type_data_tag); + bne(t0, tmp, profile_continue); + } + + // calculate slot step + static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0)); + static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0; + + // calculate type step + static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0)); + static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0; + + if (MethodData::profile_arguments()) { + Label done, loop, loopEnd, profileArgument, profileReturnType; + RegSet pushed_registers; + pushed_registers += x15; + pushed_registers += x16; + pushed_registers += x17; + Register mdo_addr = x15; + Register index = x16; + Register off_to_args = x17; + push_reg(pushed_registers, sp); + + mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset())); + mv(t0, TypeProfileArgsLimit); + beqz(t0, loopEnd); + + mv(index, zr); // index < TypeProfileArgsLimit + bind(loop); + bgtz(index, profileReturnType); + li(t0, (int)MethodData::profile_return()); + beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false + bind(profileReturnType); + // If return value type is profiled we may have no argument to profile + ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); + mv(t1, - TypeStackSlotEntries::per_arg_count()); + mul(t1, index, t1); + add(tmp, tmp, t1); + li(t1, TypeStackSlotEntries::per_arg_count()); + add(t0, mdp, off_to_args); + blt(tmp, t1, done); + + bind(profileArgument); + + ld(tmp, Address(callee, Method::const_offset())); + load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset())); + // stack offset o (zero based) from the start of the argument + // list, for n arguments translates into offset n - o - 1 from + // the end of the argument list + li(t0, stack_slot_offset0); + li(t1, slot_step); + mul(t1, index, t1); + add(t0, t0, t1); + add(t0, mdp, t0); + ld(t0, Address(t0)); + sub(tmp, tmp, t0); + addi(tmp, tmp, -1); + Address arg_addr = argument_address(tmp); + ld(tmp, arg_addr); + + li(t0, argument_type_offset0); + li(t1, type_step); + mul(t1, index, t1); + add(t0, t0, t1); + add(mdo_addr, mdp, t0); + Address mdo_arg_addr(mdo_addr, 0); + profile_obj_type(tmp, mdo_arg_addr, t1); + + int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); + addi(off_to_args, off_to_args, to_add); + + // increment index by 1 + addi(index, index, 1); + li(t1, TypeProfileArgsLimit); + blt(index, t1, loop); + bind(loopEnd); + + if (MethodData::profile_return()) { + ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); + addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count()); + } + + add(t0, mdp, off_to_args); + bind(done); + mv(mdp, t0); + + // unspill the clobbered registers + pop_reg(pushed_registers, sp); + + if (MethodData::profile_return()) { + // We're right after the type profile for the last + // argument. tmp is the number of cells left in the + // CallTypeData/VirtualCallTypeData to reach its end. Non null + // if there's a return to profile. + assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size)); + } + sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); + } else { + assert(MethodData::profile_return(), "either profile call args or call ret"); + update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); + } + + // mdp points right after the end of the + // CallTypeData/VirtualCallTypeData, right after the cells for the + // return value type if there's one + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { + assert_different_registers(mdp, ret, tmp, xbcp, t0, t1); + if (ProfileInterpreter && MethodData::profile_return()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + if (MethodData::profile_return_jsr292_only()) { + assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + + // If we don't profile all invoke bytecodes we must make sure + // it's a bytecode we indeed profile. We can't go back to the + // begining of the ProfileData we intend to update to check its + // type because we're right after it and we don't known its + // length + Label do_profile; + lbu(t0, Address(xbcp, 0)); + li(tmp, (u1)Bytecodes::_invokedynamic); + beq(t0, tmp, do_profile); + li(tmp, (u1)Bytecodes::_invokehandle); + beq(t0, tmp, do_profile); + get_method(tmp); + lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes())); + li(t1, static_cast(vmIntrinsics::_compiledLambdaForm)); + bne(t0, t1, profile_continue); + bind(do_profile); + } + + Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); + mv(tmp, ret); + profile_obj_type(tmp, mdo_ret_addr, t1); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) { + assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3); + if (ProfileInterpreter && MethodData::profile_parameters()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + // Load the offset of the area within the MDO used for + // parameters. If it's negative we're not profiling any parameters + lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()))); + srli(tmp2, tmp1, 31); + bnez(tmp2, profile_continue); // i.e. sign bit set + + // Compute a pointer to the area for parameters from the offset + // and move the pointer to the slot for the last + // parameters. Collect profiling from last parameter down. + // mdo start + parameters offset + array length - 1 + add(mdp, mdp, tmp1); + ld(tmp1, Address(mdp, ArrayData::array_len_offset())); + add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); + + Label loop; + bind(loop); + + int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); + int type_base = in_bytes(ParametersTypeData::type_offset(0)); + int per_arg_scale = exact_log2(DataLayout::cell_size); + add(t0, mdp, off_base); + add(t1, mdp, type_base); + + shadd(tmp2, tmp1, t0, tmp2, per_arg_scale); + // load offset on the stack from the slot for this parameter + ld(tmp2, Address(tmp2, 0)); + neg(tmp2, tmp2); + + // read the parameter from the local area + shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize); + ld(tmp2, Address(tmp2, 0)); + + // profile the parameter + shadd(t1, tmp1, t1, t0, per_arg_scale); + Address arg_type(t1, 0); + profile_obj_type(tmp2, arg_type, tmp3); + + // go to next parameter + add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); + bgez(tmp1, loop); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::get_method_counters(Register method, + Register mcs, Label& skip) { + Label has_counters; + ld(mcs, Address(method, Method::method_counters_offset())); + bnez(mcs, has_counters); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), method); + ld(mcs, Address(method, Method::method_counters_offset())); + beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory + bind(has_counters); +} + +#ifdef ASSERT +void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits, + const char* msg, bool stop_by_hit) { + Label L; + andi(t0, access_flags, flag_bits); + if (stop_by_hit) { + beqz(t0, L); + } else { + bnez(t0, L); + } + stop(msg); + bind(L); +} + +void InterpreterMacroAssembler::verify_frame_setup() { + Label L; + const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); + ld(t0, monitor_block_top); + beq(esp, t0, L); + stop("broken stack frame setup in interpreter"); + bind(L); +} +#endif Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interp_masm_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interp_masm_riscv.hpp @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP +#define CPU_RISCV_INTERP_MASM_RISCV_HPP + +#include "asm/macroAssembler.hpp" +#include "interpreter/invocationCounter.hpp" +#include "runtime/frame.hpp" + +// This file specializes the assember with interpreter-specific macros + +typedef ByteSize (*OffsetFunction)(uint); + +class InterpreterMacroAssembler: public MacroAssembler { + protected: + // Interpreter specific version of call_VM_base + using MacroAssembler::call_VM_leaf_base; + + virtual void call_VM_leaf_base(address entry_point, + int number_of_arguments); + + virtual void call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions); + + // base routine for all dispatches + void dispatch_base(TosState state, address* table, bool verifyoop = true, + bool generate_poll = false, Register Rs = t0); + + public: + InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {} + virtual ~InterpreterMacroAssembler() {} + + void load_earlyret_value(TosState state); + + void jump_to_entry(address entry); + + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + // Interpreter-specific registers + void save_bcp() { + sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize)); + } + + void restore_bcp() { + ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize)); + } + + void restore_locals() { + ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize)); + } + + void restore_constant_pool_cache() { + ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); + } + + void get_dispatch(); + + // Helpers for runtime call arguments/results + void get_method(Register reg) { + ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize)); + } + + void get_const(Register reg) { + get_method(reg); + ld(reg, Address(reg, in_bytes(Method::const_offset()))); + } + + void get_constant_pool(Register reg) { + get_const(reg); + ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset()))); + } + + void get_constant_pool_cache(Register reg) { + get_constant_pool(reg); + ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes())); + } + + void get_cpool_and_tags(Register cpool, Register tags) { + get_constant_pool(cpool); + ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes())); + } + + void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); + void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_method_counters(Register method, Register mcs, Label& skip); + + // Load cpool->resolved_references(index). + void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15); + + // Load cpool->resolved_klass_at(index). + void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp); + + void load_resolved_method_at_index(int byte_no, Register method, Register cache); + + void pop_ptr(Register r = x10); + void pop_i(Register r = x10); + void pop_l(Register r = x10); + void pop_f(FloatRegister r = f10); + void pop_d(FloatRegister r = f10); + void push_ptr(Register r = x10); + void push_i(Register r = x10); + void push_l(Register r = x10); + void push_f(FloatRegister r = f10); + void push_d(FloatRegister r = f10); + + void pop(TosState state); // transition vtos -> state + void push(TosState state); // transition state -> vtos + + void empty_expression_stack() { + ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize)); + // NULL last_sp until next java call + sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + } + + // Helpers for swap and dup + void load_ptr(int n, Register val); + void store_ptr(int n, Register val); + + // Load float value from 'address'. The value is loaded onto the FPU register v0. + void load_float(Address src); + void load_double(Address src); + + // Generate a subtype check: branch to ok_is_subtype if sub_klass is + // a subtype of super_klass. + void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); + + // Dispatching + void dispatch_prolog(TosState state, int step = 0); + void dispatch_epilog(TosState state, int step = 0); + // dispatch via t0 + void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0); + // dispatch normal table via t0 (assume t0 is loaded already) + void dispatch_only_normal(TosState state, Register Rs = t0); + void dispatch_only_noverify(TosState state, Register Rs = t0); + // load t0 from [xbcp + step] and dispatch via t0 + void dispatch_next(TosState state, int step = 0, bool generate_poll = false); + // load t0 from [xbcp] and dispatch via t0 and table + void dispatch_via (TosState state, address* table); + + // jump to an invoked target + void prepare_to_jump_from_interpreted(); + void jump_from_interpreted(Register method); + + + // Returning from interpreted functions + // + // Removes the current activation (incl. unlocking of monitors) + // and sets up the return address. This code is also used for + // exception unwindwing. In that case, we do not want to throw + // IllegalMonitorStateExceptions, since that might get us into an + // infinite rethrow exception loop. + // Additionally this code is used for popFrame and earlyReturn. + // In popFrame case we want to skip throwing an exception, + // installing an exception, and notifying jvmdi. + // In earlyReturn case we only want to skip throwing an exception + // and installing an exception. + void remove_activation(TosState state, + bool throw_monitor_exception = true, + bool install_monitor_exception = true, + bool notify_jvmdi = true); + + // FIXME: Give us a valid frame at a null check. + virtual void null_check(Register reg, int offset = -1) { + MacroAssembler::null_check(reg, offset); + } + + // Object locking + void lock_object (Register lock_reg); + void unlock_object(Register lock_reg); + + // Interpreter profiling operations + void set_method_data_pointer_for_bcp(); + void test_method_data_pointer(Register mdp, Label& zero_continue); + void verify_method_data_pointer(); + + void set_mdp_data_at(Register mdp_in, int constant, Register value); + void increment_mdp_data_at(Address data, bool decrement = false); + void increment_mdp_data_at(Register mdp_in, int constant, + bool decrement = false); + void increment_mdp_data_at(Register mdp_in, Register reg, int constant, + bool decrement = false); + void increment_mask_and_jump(Address counter_addr, + int increment, Address mask, + Register tmp1, Register tmp2, + bool preloaded, Label* where); + + void set_mdp_flag_at(Register mdp_in, int flag_constant); + void test_mdp_data_at(Register mdp_in, int offset, Register value, + Register test_value_out, + Label& not_equal_continue); + + void record_klass_in_profile(Register receiver, Register mdp, + Register reg2, bool is_virtual_call); + void record_klass_in_profile_helper(Register receiver, Register mdp, + Register reg2, + Label& done, bool is_virtual_call); + void record_item_in_profile_helper(Register item, Register mdp, + Register reg2, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, + int non_profiled_offset); + + void update_mdp_by_offset(Register mdp_in, int offset_of_offset); + void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); + void update_mdp_by_constant(Register mdp_in, int constant); + void update_mdp_for_ret(Register return_bci); + + // narrow int return value + void narrow(Register result); + + void profile_taken_branch(Register mdp, Register bumped_count); + void profile_not_taken_branch(Register mdp); + void profile_call(Register mdp); + void profile_final_call(Register mdp); + void profile_virtual_call(Register receiver, Register mdp, + Register t1, + bool receiver_can_be_null = false); + void profile_ret(Register return_bci, Register mdp); + void profile_null_seen(Register mdp); + void profile_typecheck(Register mdp, Register klass, Register temp); + void profile_typecheck_failed(Register mdp); + void profile_switch_default(Register mdp); + void profile_switch_case(Register index_in_scratch, Register mdp, + Register temp); + + void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp); + void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); + void profile_return_type(Register mdp, Register ret, Register tmp); + void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3); + + // Debugging + // only if +VerifyFPU && (state == ftos || state == dtos) + void verify_FPU(int stack_depth, TosState state = ftos); + + typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; + + // support for jvmti/dtrace + void notify_method_entry(); + void notify_method_exit(TosState state, NotifyMethodExitMode mode); + + virtual void _call_Unimplemented(address call_site) { + save_bcp(); + set_last_Java_frame(esp, fp, (address) pc(), t0); + MacroAssembler::_call_Unimplemented(call_site); + } + +#ifdef ASSERT + void verify_access_flags(Register access_flags, uint32_t flag_bits, + const char* msg, bool stop_by_hit = true); + void verify_frame_setup(); +#endif +}; + +#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/universe.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/icache.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/signature.hpp" + +#define __ _masm-> + +// Implementation of SignatureHandlerGenerator +Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; } +Register InterpreterRuntime::SignatureHandlerGenerator::to() { return sp; } +Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; } + +Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() { + if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { + return g_INTArgReg[++_num_reg_int_args]; + } + return noreg; +} + +FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() { + if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { + return g_FPArgReg[_num_reg_fp_args++]; + } else { + return fnoreg; + } +} + +int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() { + int ret = _stack_offset; + _stack_offset += wordSize; + return ret; +} + +InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( + const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { + _masm = new MacroAssembler(buffer); // allocate on resourse area by default + _num_reg_int_args = (method->is_static() ? 1 : 0); + _num_reg_fp_args = 0; + _stack_offset = 0; +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset())); + + Register reg = next_gpr(); + if (reg != noreg) { + __ lw(reg, src); + } else { + __ lw(x10, src); + __ sw(x10, Address(to(), next_stack_offset())); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); + + Register reg = next_gpr(); + if (reg != noreg) { + __ ld(reg, src); + } else { + __ ld(x10, src); + __ sd(x10, Address(to(), next_stack_offset())); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset())); + + FloatRegister reg = next_fpr(); + if (reg != fnoreg) { + __ flw(reg, src); + } else { + // a floating-point argument is passed according to the integer calling + // convention if no floating-point argument register available + pass_int(); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); + + FloatRegister reg = next_fpr(); + if (reg != fnoreg) { + __ fld(reg, src); + } else { + // a floating-point argument is passed according to the integer calling + // convention if no floating-point argument register available + pass_long(); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { + Register reg = next_gpr(); + if (reg == c_rarg1) { + assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); + __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset())); + } else if (reg != noreg) { + // c_rarg2-c_rarg7 + __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); + __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2, 2:c_rarg3... + __ ld(temp(), x10); + Label L; + __ beqz(temp(), L); + __ mv(reg, x10); + __ bind(L); + } else { + //to stack + __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); + __ ld(temp(), x10); + Label L; + __ bnez(temp(), L); + __ mv(x10, zr); + __ bind(L); + assert(sizeof(jobject) == wordSize, ""); + __ sd(x10, Address(to(), next_stack_offset())); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { + // generate code to handle arguments + iterate(fingerprint); + + // return result handler + __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type()))); + __ ret(); + + __ flush(); +} + + +// Implementation of SignatureHandlerLibrary + +void SignatureHandlerLibrary::pd_set_handler(address handler) {} + + +class SlowSignatureHandler + : public NativeSignatureIterator { + private: + address _from; + intptr_t* _to; + intptr_t* _int_args; + intptr_t* _fp_args; + intptr_t* _fp_identifiers; + unsigned int _num_reg_int_args; + unsigned int _num_reg_fp_args; + + intptr_t* single_slot_addr() { + intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + return from_addr; + } + + intptr_t* double_slot_addr() { + intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1)); + _from -= 2 * Interpreter::stackElementSize; + return from_addr; + } + + int pass_gpr(intptr_t value) { + if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { + *_int_args++ = value; + return _num_reg_int_args++; + } + return -1; + } + + int pass_fpr(intptr_t value) { + if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { + *_fp_args++ = value; + return _num_reg_fp_args++; + } + return -1; + } + + void pass_stack(intptr_t value) { + *_to++ = value; + } + + virtual void pass_int() { + jint value = *(jint*)single_slot_addr(); + if (pass_gpr(value) < 0) { + pass_stack(value); + } + } + + virtual void pass_long() { + intptr_t value = *double_slot_addr(); + if (pass_gpr(value) < 0) { + pass_stack(value); + } + } + + virtual void pass_object() { + intptr_t* addr = single_slot_addr(); + intptr_t value = *addr == 0 ? NULL : (intptr_t)addr; + if (pass_gpr(value) < 0) { + pass_stack(value); + } + } + + virtual void pass_float() { + jint value = *(jint*) single_slot_addr(); + // a floating-point argument is passed according to the integer calling + // convention if no floating-point argument register available + if (pass_fpr(value) < 0 && pass_gpr(value) < 0) { + pass_stack(value); + } + } + + virtual void pass_double() { + intptr_t value = *double_slot_addr(); + int arg = pass_fpr(value); + if (0 <= arg) { + *_fp_identifiers |= (1ull << arg); // mark as double + } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack + pass_stack(value); + } + } + + public: + SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to) + : NativeSignatureIterator(method) + { + _from = from; + _to = to; + + _int_args = to - (method->is_static() ? 16 : 17); + _fp_args = to - 8; + _fp_identifiers = to - 9; + *(int*) _fp_identifiers = 0; + _num_reg_int_args = (method->is_static() ? 1 : 0); + _num_reg_fp_args = 0; + } + + ~SlowSignatureHandler() + { + _from = NULL; + _to = NULL; + _int_args = NULL; + _fp_args = NULL; + _fp_identifiers = NULL; + } +}; + + +JRT_ENTRY(address, + InterpreterRuntime::slow_signature_handler(JavaThread* current, + Method* method, + intptr_t* from, + intptr_t* to)) + methodHandle m(current, (Method*)method); + assert(m->is_native(), "sanity check"); + + // handle arguments + SlowSignatureHandler ssh(m, (address)from, to); + ssh.iterate(UCONST64(-1)); + + // return result handler + return Interpreter::result_handler(m->result_type()); +JRT_END Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP +#define CPU_RISCV_INTERPRETERRT_RISCV_HPP + +// This is included in the middle of class Interpreter. +// Do not include files here. + +// native method calls + +class SignatureHandlerGenerator: public NativeSignatureIterator { + private: + MacroAssembler* _masm; + unsigned int _num_reg_fp_args; + unsigned int _num_reg_int_args; + int _stack_offset; + + void pass_int(); + void pass_long(); + void pass_float(); + void pass_double(); + void pass_object(); + + Register next_gpr(); + FloatRegister next_fpr(); + int next_stack_offset(); + + public: + // Creation + SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); + virtual ~SignatureHandlerGenerator() { + _masm = NULL; + } + + // Code generation + void generate(uint64_t fingerprint); + + // Code generation support + static Register from(); + static Register to(); + static Register temp(); +}; + +#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP +#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP + +private: + + // FP value associated with _last_Java_sp: + intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to + +public: + // Each arch must define reset, save, restore + // These are used by objects that only care about: + // 1 - initializing a new state (thread creation, javaCalls) + // 2 - saving a current state (javaCalls) + // 3 - restoring an old state (javaCalls) + + void clear(void) { + // clearing _last_Java_sp must be first + _last_Java_sp = NULL; + OrderAccess::release(); + _last_Java_fp = NULL; + _last_Java_pc = NULL; + } + + void copy(JavaFrameAnchor* src) { + // In order to make sure the transition state is valid for "this" + // We must clear _last_Java_sp before copying the rest of the new data + // + // Hack Alert: Temporary bugfix for 4717480/4721647 + // To act like previous version (pd_cache_state) don't NULL _last_Java_sp + // unless the value is changing + // + assert(src != NULL, "Src should not be NULL."); + if (_last_Java_sp != src->_last_Java_sp) { + _last_Java_sp = NULL; + OrderAccess::release(); + } + _last_Java_fp = src->_last_Java_fp; + _last_Java_pc = src->_last_Java_pc; + // Must be last so profiler will always see valid frame if has_last_frame() is true + _last_Java_sp = src->_last_Java_sp; + } + + bool walkable(void) { return _last_Java_sp != NULL && _last_Java_pc != NULL; } + + void make_walkable(); + + intptr_t* last_Java_sp(void) const { return _last_Java_sp; } + + const address last_Java_pc(void) { return _last_Java_pc; } + +private: + + static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } + +public: + + void set_last_Java_sp(intptr_t* java_sp) { _last_Java_sp = java_sp; OrderAccess::release(); } + + intptr_t* last_Java_fp(void) { return _last_Java_fp; } + +#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "prims/jvmtiExport.hpp" +#include "runtime/safepoint.hpp" + +#define __ masm-> + +#define BUFFER_SIZE 30*wordSize + +// Instead of issuing a LoadLoad barrier we create an address +// dependency between loads; this might be more efficient. + +// Common register usage: +// x10/f10: result +// c_rarg0: jni env +// c_rarg1: obj +// c_rarg2: jfield id + +static const Register robj = x13; +static const Register rcounter = x14; +static const Register roffset = x15; +static const Register rcounter_addr = x16; +static const Register result = x17; + +address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + const char *name; + switch (type) { + case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; + case T_BYTE: name = "jni_fast_GetByteField"; break; + case T_CHAR: name = "jni_fast_GetCharField"; break; + case T_SHORT: name = "jni_fast_GetShortField"; break; + case T_INT: name = "jni_fast_GetIntField"; break; + case T_LONG: name = "jni_fast_GetLongField"; break; + case T_FLOAT: name = "jni_fast_GetFloatField"; break; + case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; + default: ShouldNotReachHere(); + name = NULL; // unreachable + } + ResourceMark rm; + BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); + CodeBuffer cbuf(blob); + MacroAssembler* masm = new MacroAssembler(&cbuf); + address fast_entry = __ pc(); + + Label slow; + int32_t offset = 0; + __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset); + __ addi(rcounter_addr, rcounter_addr, offset); + + Address safepoint_counter_addr(rcounter_addr, 0); + __ lwu(rcounter, safepoint_counter_addr); + // An even value means there are no ongoing safepoint operations + __ andi(t0, rcounter, 1); + __ bnez(t0, slow); + + if (JvmtiExport::can_post_field_access()) { + // Using barrier to order wrt. JVMTI check and load of result. + __ membar(MacroAssembler::LoadLoad); + + // Check to see if a field access watch has been set before we + // take the fast path. + int32_t offset2; + __ la_patchable(result, + ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), + offset2); + __ lwu(result, Address(result, offset2)); + __ bnez(result, slow); + + __ mv(robj, c_rarg1); + } else { + // Using address dependency to order wrt. load of result. + __ xorr(robj, c_rarg1, rcounter); + __ xorr(robj, robj, rcounter); // obj, since + // robj ^ rcounter ^ rcounter == robj + // robj is address dependent on rcounter. + } + + // Both robj and t0 are clobbered by try_resolve_jobject_in_native. + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + assert_cond(bs != NULL); + bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow); + + __ srli(roffset, c_rarg2, 2); // offset + + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); // Used by the segfault handler + __ add(roffset, robj, roffset); + + switch (type) { + case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break; + case T_BYTE: __ lb(result, Address(roffset, 0)); break; + case T_CHAR: __ lhu(result, Address(roffset, 0)); break; + case T_SHORT: __ lh(result, Address(roffset, 0)); break; + case T_INT: __ lw(result, Address(roffset, 0)); break; + case T_LONG: __ ld(result, Address(roffset, 0)); break; + case T_FLOAT: { + __ flw(f28, Address(roffset, 0)); // f28 as temporaries + __ fmv_x_w(result, f28); // f{31--0}-->x + break; + } + case T_DOUBLE: { + __ fld(f28, Address(roffset, 0)); // f28 as temporaries + __ fmv_x_d(result, f28); // d{63--0}-->x + break; + } + default: ShouldNotReachHere(); + } + + // Using acquire: Order JVMTI check and load of result wrt. succeeding check + // (LoadStore for volatile field). + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + + __ lw(t0, safepoint_counter_addr); + __ bne(rcounter, t0, slow); + + switch (type) { + case T_FLOAT: __ fmv_w_x(f10, result); break; + case T_DOUBLE: __ fmv_d_x(f10, result); break; + default: __ mv(x10, result); break; + } + __ ret(); + + slowcase_entry_pclist[count++] = __ pc(); + __ bind(slow); + address slow_case_addr; + switch (type) { + case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; + case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; + case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; + case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; + case T_INT: slow_case_addr = jni_GetIntField_addr(); break; + case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; + case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; + case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; + default: ShouldNotReachHere(); + slow_case_addr = NULL; // unreachable + } + + { + __ enter(); + int32_t tmp_offset = 0; + __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset); + __ jalr(x1, t0, tmp_offset); + __ leave(); + __ ret(); + } + __ flush(); + + return fast_entry; +} + + +address JNI_FastGetField::generate_fast_get_boolean_field() { + return generate_fast_get_int_field0(T_BOOLEAN); +} + +address JNI_FastGetField::generate_fast_get_byte_field() { + return generate_fast_get_int_field0(T_BYTE); +} + +address JNI_FastGetField::generate_fast_get_char_field() { + return generate_fast_get_int_field0(T_CHAR); +} + +address JNI_FastGetField::generate_fast_get_short_field() { + return generate_fast_get_int_field0(T_SHORT); +} + +address JNI_FastGetField::generate_fast_get_int_field() { + return generate_fast_get_int_field0(T_INT); +} + +address JNI_FastGetField::generate_fast_get_long_field() { + return generate_fast_get_int_field0(T_LONG); +} + +address JNI_FastGetField::generate_fast_get_float_field() { + return generate_fast_get_int_field0(T_FLOAT); +} + +address JNI_FastGetField::generate_fast_get_double_field() { + return generate_fast_get_int_field0(T_DOUBLE); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/jniTypes_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/jniTypes_riscv.hpp @@ -0,0 +1,106 @@ +/* + * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_JNITYPES_RISCV_HPP +#define CPU_RISCV_JNITYPES_RISCV_HPP + +#include "jni.h" +#include "memory/allocation.hpp" +#include "oops/oop.hpp" + +// This file holds platform-dependent routines used to write primitive jni +// types to the array of arguments passed into JavaCalls::call + +class JNITypes : private AllStatic { + // These functions write a java primitive type (in native format) + // to a java stack slot array to be passed as an argument to JavaCalls:calls. + // I.e., they are functionally 'push' operations if they have a 'pos' + // formal parameter. Note that jlong's and jdouble's are written + // _in reverse_ of the order in which they appear in the interpreter + // stack. This is because call stubs (see stubGenerator_sparc.cpp) + // reverse the argument list constructed by JavaCallArguments (see + // javaCalls.hpp). + +public: + // Ints are stored in native format in one JavaCallArgument slot at *to. + static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; } + static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; } + static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; } + + // Longs are stored in native format in one JavaCallArgument slot at + // *(to+1). + static inline void put_long(jlong from, intptr_t *to) { + *(jlong*) (to + 1) = from; + } + + static inline void put_long(jlong from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = from; + pos += 2; + } + + static inline void put_long(jlong *from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = *from; + pos += 2; + } + + // Oops are stored in native format in one JavaCallArgument slot at *to. + static inline void put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); } + static inline void put_obj(jobject from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } + static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } + static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } + +#undef _JNI_SLOT_OFFSET +#define _JNI_SLOT_OFFSET 1 + // Doubles are stored in native word format in one JavaCallArgument + // slot at *(to+1). + static inline void put_double(jdouble from, intptr_t *to) { + *(jdouble*) (to + 1) = from; + } + + static inline void put_double(jdouble from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = from; + pos += 2; + } + + static inline void put_double(jdouble *from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = *from; + pos += 2; + } + + // The get_xxx routines, on the other hand, actually _do_ fetch + // java primitive types from the interpreter stack. + // No need to worry about alignment on Intel. + static inline jint get_int (intptr_t *from) { return *(jint *) from; } + static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } + static inline oop get_obj (intptr_t *from) { return *(oop *) from; } + static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } + static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } +#undef _JNI_SLOT_OFFSET +}; + +#endif // CPU_RISCV_JNITYPES_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -0,0 +1,4253 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/accessDecorators.hpp" +#include "oops/compressedOops.inline.hpp" +#include "oops/klass.inline.hpp" +#include "oops/oop.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/jniHandles.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.hpp" +#include "utilities/powerOfTwo.hpp" +#ifdef COMPILER2 +#include "opto/compile.hpp" +#include "opto/node.hpp" +#include "opto/output.hpp" +#endif + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) block_comment(str) +#endif +#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") + +static void pass_arg0(MacroAssembler* masm, Register arg) { + if (c_rarg0 != arg) { + assert_cond(masm != NULL); + masm->mv(c_rarg0, arg); + } +} + +static void pass_arg1(MacroAssembler* masm, Register arg) { + if (c_rarg1 != arg) { + assert_cond(masm != NULL); + masm->mv(c_rarg1, arg); + } +} + +static void pass_arg2(MacroAssembler* masm, Register arg) { + if (c_rarg2 != arg) { + assert_cond(masm != NULL); + masm->mv(c_rarg2, arg); + } +} + +static void pass_arg3(MacroAssembler* masm, Register arg) { + if (c_rarg3 != arg) { + assert_cond(masm != NULL); + masm->mv(c_rarg3, arg); + } +} + +void MacroAssembler::align(int modulus, int extra_offset) { + CompressibleRegion cr(this); + while ((offset() + extra_offset) % modulus != 0) { nop(); } +} + +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { + call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); +} + +// Implementation of call_VM versions + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + bool check_exceptions) { + call_VM_helper(oop_result, entry_point, 0, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions) { + pass_arg1(this, arg_1); + call_VM_helper(oop_result, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + call_VM_helper(oop_result, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + assert(arg_1 != c_rarg3, "smashed arg"); + assert(arg_2 != c_rarg3, "smashed arg"); + pass_arg3(this, arg_3); + + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + + pass_arg1(this, arg_1); + call_VM_helper(oop_result, entry_point, 3, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + bool check_exceptions) { + pass_arg1(this, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + assert(arg_1 != c_rarg3, "smashed arg"); + assert(arg_2 != c_rarg3, "smashed arg"); + pass_arg3(this, arg_3); + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); +} + +// these are no-ops overridden by InterpreterMacroAssembler +void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} +void MacroAssembler::check_and_handle_popframe(Register java_thread) {} + +// Calls to C land +// +// When entering C land, the fp, & esp of the last Java frame have to be recorded +// in the (thread-local) JavaThread object. When leaving C land, the last Java fp +// has to be reset to 0. This is required to allow proper stack traversal. +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Register last_java_pc, + Register tmp) { + + if (last_java_pc->is_valid()) { + sd(last_java_pc, Address(xthread, + JavaThread::frame_anchor_offset() + + JavaFrameAnchor::last_Java_pc_offset())); + } + + // determine last_java_sp register + if (last_java_sp == sp) { + mv(tmp, sp); + last_java_sp = tmp; + } else if (!last_java_sp->is_valid()) { + last_java_sp = esp; + } + + sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); + + // last_java_fp is optional + if (last_java_fp->is_valid()) { + sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); + } +} + +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + address last_java_pc, + Register tmp) { + assert(last_java_pc != NULL, "must provide a valid PC"); + + la(tmp, last_java_pc); + sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + + set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp); +} + +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Label &L, + Register tmp) { + if (L.is_bound()) { + set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); + } else { + InstructionMark im(this); + L.add_patch_at(code(), locator()); + set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); + } +} + +void MacroAssembler::reset_last_Java_frame(bool clear_fp) { + // we must set sp to zero to clear frame + sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); + + // must clear fp, so that compiled frames are not confused; it is + // possible that we need it only for debugging + if (clear_fp) { + sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); + } + + // Always clear the pc because it could have been set by make_walkable() + sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); +} + +void MacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + // determine java_thread register + if (!java_thread->is_valid()) { + java_thread = xthread; + } + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = esp; + } + + // debugging support + assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); + assert(java_thread == xthread, "unexpected register"); + + assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); + assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); + + // push java thread (becomes first argument of C function) + mv(c_rarg0, java_thread); + + // set last Java frame before call + assert(last_java_sp != fp, "can't use fp"); + + Label l; + set_last_Java_frame(last_java_sp, fp, l, t0); + + // do the call, remove parameters + MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); + + // reset last Java frame + // Only interpreter should have to clear fp + reset_last_Java_frame(true); + + // C++ interp handles this in the interpreter + check_and_handle_popframe(java_thread); + check_and_handle_earlyret(java_thread); + + if (check_exceptions) { + // check for pending exceptions (java_thread is set upon return) + ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); + Label ok; + beqz(t0, ok); + int32_t offset = 0; + la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset); + jalr(x0, t0, offset); + bind(ok); + } + + // get oop result if there is one and reset the value in the thread + if (oop_result->is_valid()) { + get_vm_result(oop_result, java_thread); + } +} + +void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { + ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); + sd(zr, Address(java_thread, JavaThread::vm_result_offset())); + verify_oop(oop_result, "broken oop in call_VM_base"); +} + +void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { + ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); + sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); +} + +void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { + assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); + assert_different_registers(klass, xthread, tmp); + + Label L_fallthrough, L_tmp; + if (L_fast_path == NULL) { + L_fast_path = &L_fallthrough; + } else if (L_slow_path == NULL) { + L_slow_path = &L_fallthrough; + } + + // Fast path check: class is fully initialized + lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); + sub(tmp, tmp, InstanceKlass::fully_initialized); + beqz(tmp, *L_fast_path); + + // Fast path check: current thread is initializer thread + ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); + + if (L_slow_path == &L_fallthrough) { + beq(xthread, tmp, *L_fast_path); + bind(*L_slow_path); + } else if (L_fast_path == &L_fallthrough) { + bne(xthread, tmp, *L_slow_path); + bind(*L_fast_path); + } else { + Unimplemented(); + } +} + +void MacroAssembler::verify_oop(Register reg, const char* s) { + if (!VerifyOops) { return; } + + // Pass register number to verify_oop_subroutine + const char* b = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("verify_oop: %s: %s", reg->name(), s); + b = code_string(ss.as_string()); + } + BLOCK_COMMENT("verify_oop {"); + + push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + + mv(c_rarg0, reg); // c_rarg0 : x10 + li(t0, (uintptr_t)(address)b); + + // call indirectly to solve generation ordering problem + int32_t offset = 0; + la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); + ld(t1, Address(t1, offset)); + jalr(t1); + + pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + + BLOCK_COMMENT("} verify_oop"); +} + +void MacroAssembler::verify_oop_addr(Address addr, const char* s) { + if (!VerifyOops) { + return; + } + + const char* b = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("verify_oop_addr: %s", s); + b = code_string(ss.as_string()); + } + BLOCK_COMMENT("verify_oop_addr {"); + + push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + + if (addr.uses(sp)) { + la(x10, addr); + ld(x10, Address(x10, 4 * wordSize)); + } else { + ld(x10, addr); + } + + li(t0, (uintptr_t)(address)b); + + // call indirectly to solve generation ordering problem + int32_t offset = 0; + la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); + ld(t1, Address(t1, offset)); + jalr(t1); + + pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + + BLOCK_COMMENT("} verify_oop_addr"); +} + +Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, + int extra_slot_offset) { + // cf. TemplateTable::prepare_invoke(), if (load_receiver). + int stackElementSize = Interpreter::stackElementSize; + int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); +#ifdef ASSERT + int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); + assert(offset1 - offset == stackElementSize, "correct arithmetic"); +#endif + if (arg_slot.is_constant()) { + return Address(esp, arg_slot.as_constant() * stackElementSize + offset); + } else { + assert_different_registers(t0, arg_slot.as_register()); + shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); + return Address(t0, offset); + } +} + +#ifndef PRODUCT +extern "C" void findpc(intptr_t x); +#endif + +void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) +{ + // In order to get locks to work, we need to fake a in_VM state + if (ShowMessageBoxOnError) { + JavaThread* thread = JavaThread::current(); + JavaThreadState saved_state = thread->thread_state(); + thread->set_thread_state(_thread_in_vm); +#ifndef PRODUCT + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + ttyLocker ttyl; + BytecodeCounter::print(); + } +#endif + if (os::message_box(msg, "Execution stopped, print registers?")) { + ttyLocker ttyl; + tty->print_cr(" pc = 0x%016lx", pc); +#ifndef PRODUCT + tty->cr(); + findpc(pc); + tty->cr(); +#endif + tty->print_cr(" x0 = 0x%016lx", regs[0]); + tty->print_cr(" x1 = 0x%016lx", regs[1]); + tty->print_cr(" x2 = 0x%016lx", regs[2]); + tty->print_cr(" x3 = 0x%016lx", regs[3]); + tty->print_cr(" x4 = 0x%016lx", regs[4]); + tty->print_cr(" x5 = 0x%016lx", regs[5]); + tty->print_cr(" x6 = 0x%016lx", regs[6]); + tty->print_cr(" x7 = 0x%016lx", regs[7]); + tty->print_cr(" x8 = 0x%016lx", regs[8]); + tty->print_cr(" x9 = 0x%016lx", regs[9]); + tty->print_cr("x10 = 0x%016lx", regs[10]); + tty->print_cr("x11 = 0x%016lx", regs[11]); + tty->print_cr("x12 = 0x%016lx", regs[12]); + tty->print_cr("x13 = 0x%016lx", regs[13]); + tty->print_cr("x14 = 0x%016lx", regs[14]); + tty->print_cr("x15 = 0x%016lx", regs[15]); + tty->print_cr("x16 = 0x%016lx", regs[16]); + tty->print_cr("x17 = 0x%016lx", regs[17]); + tty->print_cr("x18 = 0x%016lx", regs[18]); + tty->print_cr("x19 = 0x%016lx", regs[19]); + tty->print_cr("x20 = 0x%016lx", regs[20]); + tty->print_cr("x21 = 0x%016lx", regs[21]); + tty->print_cr("x22 = 0x%016lx", regs[22]); + tty->print_cr("x23 = 0x%016lx", regs[23]); + tty->print_cr("x24 = 0x%016lx", regs[24]); + tty->print_cr("x25 = 0x%016lx", regs[25]); + tty->print_cr("x26 = 0x%016lx", regs[26]); + tty->print_cr("x27 = 0x%016lx", regs[27]); + tty->print_cr("x28 = 0x%016lx", regs[28]); + tty->print_cr("x30 = 0x%016lx", regs[30]); + tty->print_cr("x31 = 0x%016lx", regs[31]); + BREAKPOINT; + } + } + fatal("DEBUG MESSAGE: %s", msg); +} + +void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) { + Label done, not_weak; + beqz(value, done); // Use NULL as-is. + + // Test for jweak tag. + andi(t0, value, JNIHandles::weak_tag_mask); + beqz(t0, not_weak); + + // Resolve jweak. + access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, + Address(value, -JNIHandles::weak_tag_value), tmp, thread); + verify_oop(value); + j(done); + + bind(not_weak); + // Resolve (untagged) jobject. + access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); + verify_oop(value); + bind(done); +} + +void MacroAssembler::stop(const char* msg) { + address ip = pc(); + pusha(); + // The length of the instruction sequence emitted should be independent + // of the values of msg and ip so that the size of mach nodes for scratch + // emit and normal emit matches. + mv(c_rarg0, (address)msg); + mv(c_rarg1, (address)ip); + mv(c_rarg2, sp); + mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); + jalr(c_rarg3); + ebreak(); +} + +void MacroAssembler::unimplemented(const char* what) { + const char* buf = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("unimplemented: %s", what); + buf = code_string(ss.as_string()); + } + stop(buf); +} + +void MacroAssembler::emit_static_call_stub() { + // CompiledDirectStaticCall::set_to_interpreted knows the + // exact layout of this stub. + + ifence(); + mov_metadata(xmethod, (Metadata*)NULL); + + // Jump to the entry point of the i2c stub. + int32_t offset = 0; + movptr_with_offset(t0, 0, offset); + jalr(x0, t0, offset); +} + +void MacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments, + Label *retaddr) { + call_native_base(entry_point, retaddr); +} + +void MacroAssembler::call_native(address entry_point, Register arg_0) { + pass_arg0(this, arg_0); + call_native_base(entry_point); +} + +void MacroAssembler::call_native_base(address entry_point, Label *retaddr) { + Label E, L; + int32_t offset = 0; + push_reg(0x80000040, sp); // push << t0 & xmethod >> to sp + movptr_with_offset(t0, entry_point, offset); + jalr(x1, t0, offset); + if (retaddr != NULL) { + bind(*retaddr); + } + pop_reg(0x80000040, sp); // pop << t0 & xmethod >> from sp +} + +void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { + call_VM_leaf_base(entry_point, number_of_arguments); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { + pass_arg0(this, arg_0); + call_VM_leaf_base(entry_point, 1); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + pass_arg0(this, arg_0); + pass_arg1(this, arg_1); + call_VM_leaf_base(entry_point, 2); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, + Register arg_1, Register arg_2) { + pass_arg0(this, arg_0); + pass_arg1(this, arg_1); + pass_arg2(this, arg_2); + call_VM_leaf_base(entry_point, 3); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 2); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { + assert(arg_0 != c_rarg2, "smashed arg"); + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 3); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { + assert(arg_0 != c_rarg3, "smashed arg"); + assert(arg_1 != c_rarg3, "smashed arg"); + assert(arg_2 != c_rarg3, "smashed arg"); + pass_arg3(this, arg_3); + assert(arg_0 != c_rarg2, "smashed arg"); + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 4); +} + +void MacroAssembler::nop() { + addi(x0, x0, 0); +} + +void MacroAssembler::mv(Register Rd, Register Rs) { + if (Rd != Rs) { + addi(Rd, Rs, 0); + } +} + +void MacroAssembler::notr(Register Rd, Register Rs) { + xori(Rd, Rs, -1); +} + +void MacroAssembler::neg(Register Rd, Register Rs) { + sub(Rd, x0, Rs); +} + +void MacroAssembler::negw(Register Rd, Register Rs) { + subw(Rd, x0, Rs); +} + +void MacroAssembler::sext_w(Register Rd, Register Rs) { + addiw(Rd, Rs, 0); +} + +void MacroAssembler::zext_b(Register Rd, Register Rs) { + andi(Rd, Rs, 0xFF); +} + +void MacroAssembler::seqz(Register Rd, Register Rs) { + sltiu(Rd, Rs, 1); +} + +void MacroAssembler::snez(Register Rd, Register Rs) { + sltu(Rd, x0, Rs); +} + +void MacroAssembler::sltz(Register Rd, Register Rs) { + slt(Rd, Rs, x0); +} + +void MacroAssembler::sgtz(Register Rd, Register Rs) { + slt(Rd, x0, Rs); +} + +void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) { + if (Rd != Rs) { + fsgnj_s(Rd, Rs, Rs); + } +} + +void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) { + fsgnjx_s(Rd, Rs, Rs); +} + +void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) { + fsgnjn_s(Rd, Rs, Rs); +} + +void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) { + if (Rd != Rs) { + fsgnj_d(Rd, Rs, Rs); + } +} + +void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) { + fsgnjx_d(Rd, Rs, Rs); +} + +void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) { + fsgnjn_d(Rd, Rs, Rs); +} + +void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) { + vmnand_mm(vd, vs, vs); +} + +void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) { + vnsrl_wx(vd, vs, x0, vm); +} + +void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) { + vfsgnjn_vv(vd, vs, vs); +} + +void MacroAssembler::la(Register Rd, const address &dest) { + int64_t offset = dest - pc(); + if (is_offset_in_range(offset, 32)) { + auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit + addi(Rd, Rd, ((int64_t)offset << 52) >> 52); + } else { + movptr(Rd, dest); + } +} + +void MacroAssembler::la(Register Rd, const Address &adr) { + InstructionMark im(this); + code_section()->relocate(inst_mark(), adr.rspec()); + relocInfo::relocType rtype = adr.rspec().reloc()->type(); + + switch (adr.getMode()) { + case Address::literal: { + if (rtype == relocInfo::none) { + li(Rd, (intptr_t)(adr.target())); + } else { + movptr(Rd, adr.target()); + } + break; + } + case Address::base_plus_offset: { + int32_t offset = 0; + baseOffset(Rd, adr, offset); + addi(Rd, Rd, offset); + break; + } + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::la(Register Rd, Label &label) { + la(Rd, target(label)); +} + +#define INSN(NAME) \ + void MacroAssembler::NAME##z(Register Rs, const address &dest) { \ + NAME(Rs, zr, dest); \ + } \ + void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ + NAME(Rs, zr, l, is_far); \ + } \ + + INSN(beq); + INSN(bne); + INSN(blt); + INSN(ble); + INSN(bge); + INSN(bgt); + +#undef INSN + +// Float compare branch instructions + +#define INSN(NAME, FLOATCMP, BRANCH) \ + void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ + FLOATCMP##_s(t0, Rs1, Rs2); \ + BRANCH(t0, l, is_far); \ + } \ + void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ + FLOATCMP##_d(t0, Rs1, Rs2); \ + BRANCH(t0, l, is_far); \ + } + + INSN(beq, feq, bnez); + INSN(bne, feq, beqz); + +#undef INSN + + +#define INSN(NAME, FLOATCMP1, FLOATCMP2) \ + void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ + if (is_unordered) { \ + /* jump if either source is NaN or condition is expected */ \ + FLOATCMP2##_s(t0, Rs2, Rs1); \ + beqz(t0, l, is_far); \ + } else { \ + /* jump if no NaN in source and condition is expected */ \ + FLOATCMP1##_s(t0, Rs1, Rs2); \ + bnez(t0, l, is_far); \ + } \ + } \ + void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ + if (is_unordered) { \ + /* jump if either source is NaN or condition is expected */ \ + FLOATCMP2##_d(t0, Rs2, Rs1); \ + beqz(t0, l, is_far); \ + } else { \ + /* jump if no NaN in source and condition is expected */ \ + FLOATCMP1##_d(t0, Rs1, Rs2); \ + bnez(t0, l, is_far); \ + } \ + } + + INSN(ble, fle, flt); + INSN(blt, flt, fle); + +#undef INSN + +#define INSN(NAME, CMP) \ + void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ + float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ + } \ + void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ + double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ + } + + INSN(bgt, blt); + INSN(bge, ble); + +#undef INSN + + +#define INSN(NAME, CSR) \ + void MacroAssembler::NAME(Register Rd) { \ + csrr(Rd, CSR); \ + } + + INSN(rdinstret, CSR_INSTERT); + INSN(rdcycle, CSR_CYCLE); + INSN(rdtime, CSR_TIME); + INSN(frcsr, CSR_FCSR); + INSN(frrm, CSR_FRM); + INSN(frflags, CSR_FFLAGS); + +#undef INSN + +void MacroAssembler::csrr(Register Rd, unsigned csr) { + csrrs(Rd, csr, x0); +} + +#define INSN(NAME, OPFUN) \ + void MacroAssembler::NAME(unsigned csr, Register Rs) { \ + OPFUN(x0, csr, Rs); \ + } + + INSN(csrw, csrrw); + INSN(csrs, csrrs); + INSN(csrc, csrrc); + +#undef INSN + +#define INSN(NAME, OPFUN) \ + void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ + OPFUN(x0, csr, imm); \ + } + + INSN(csrwi, csrrwi); + INSN(csrsi, csrrsi); + INSN(csrci, csrrci); + +#undef INSN + +#define INSN(NAME, CSR) \ + void MacroAssembler::NAME(Register Rd, Register Rs) { \ + csrrw(Rd, CSR, Rs); \ + } + + INSN(fscsr, CSR_FCSR); + INSN(fsrm, CSR_FRM); + INSN(fsflags, CSR_FFLAGS); + +#undef INSN + +#define INSN(NAME) \ + void MacroAssembler::NAME(Register Rs) { \ + NAME(x0, Rs); \ + } + + INSN(fscsr); + INSN(fsrm); + INSN(fsflags); + +#undef INSN + +void MacroAssembler::fsrmi(Register Rd, unsigned imm) { + guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); + csrrwi(Rd, CSR_FRM, imm); +} + +void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { + csrrwi(Rd, CSR_FFLAGS, imm); +} + +#define INSN(NAME) \ + void MacroAssembler::NAME(unsigned imm) { \ + NAME(x0, imm); \ + } + + INSN(fsrmi); + INSN(fsflagsi); + +#undef INSN + +void MacroAssembler::push_reg(Register Rs) +{ + addi(esp, esp, 0 - wordSize); + sd(Rs, Address(esp, 0)); +} + +void MacroAssembler::pop_reg(Register Rd) +{ + ld(Rd, esp, 0); + addi(esp, esp, wordSize); +} + +int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { + int count = 0; + // Scan bitset to accumulate register pairs + for (int reg = 31; reg >= 0; reg--) { + if ((1U << 31) & bitset) { + regs[count++] = reg; + } + bitset <<= 1; + } + return count; +} + +// Push lots of registers in the bit set supplied. Don't push sp. +// Return the number of words pushed +int MacroAssembler::push_reg(unsigned int bitset, Register stack) { + DEBUG_ONLY(int words_pushed = 0;) + CompressibleRegion cr(this); + + unsigned char regs[32]; + int count = bitset_to_regs(bitset, regs); + // reserve one slot to align for odd count + int offset = is_even(count) ? 0 : wordSize; + + if (count) { + addi(stack, stack, - count * wordSize - offset); + } + for (int i = count - 1; i >= 0; i--) { + sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); + DEBUG_ONLY(words_pushed ++;) + } + + assert(words_pushed == count, "oops, pushed != count"); + + return count; +} + +int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { + DEBUG_ONLY(int words_popped = 0;) + CompressibleRegion cr(this); + + unsigned char regs[32]; + int count = bitset_to_regs(bitset, regs); + // reserve one slot to align for odd count + int offset = is_even(count) ? 0 : wordSize; + + for (int i = count - 1; i >= 0; i--) { + ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); + DEBUG_ONLY(words_popped ++;) + } + + if (count) { + addi(stack, stack, count * wordSize + offset); + } + assert(words_popped == count, "oops, popped != count"); + + return count; +} + +// Push float registers in the bitset, except sp. +// Return the number of heapwords pushed. +int MacroAssembler::push_fp(unsigned int bitset, Register stack) { + CompressibleRegion cr(this); + int words_pushed = 0; + unsigned char regs[32]; + int count = bitset_to_regs(bitset, regs); + int push_slots = count + (count & 1); + + if (count) { + addi(stack, stack, -push_slots * wordSize); + } + + for (int i = count - 1; i >= 0; i--) { + fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); + words_pushed++; + } + + assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); + return count; +} + +int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { + CompressibleRegion cr(this); + int words_popped = 0; + unsigned char regs[32]; + int count = bitset_to_regs(bitset, regs); + int pop_slots = count + (count & 1); + + for (int i = count - 1; i >= 0; i--) { + fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); + words_popped++; + } + + if (count) { + addi(stack, stack, pop_slots * wordSize); + } + + assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); + return count; +} + +#ifdef COMPILER2 +int MacroAssembler::push_vp(unsigned int bitset, Register stack) { + CompressibleRegion cr(this); + int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); + + // Scan bitset to accumulate register pairs + unsigned char regs[32]; + int count = 0; + for (int reg = 31; reg >= 0; reg--) { + if ((1U << 31) & bitset) { + regs[count++] = reg; + } + bitset <<= 1; + } + + for (int i = 0; i < count; i++) { + sub(stack, stack, vector_size_in_bytes); + vs1r_v(as_VectorRegister(regs[i]), stack); + } + + return count * vector_size_in_bytes / wordSize; +} + +int MacroAssembler::pop_vp(unsigned int bitset, Register stack) { + CompressibleRegion cr(this); + int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); + + // Scan bitset to accumulate register pairs + unsigned char regs[32]; + int count = 0; + for (int reg = 31; reg >= 0; reg--) { + if ((1U << 31) & bitset) { + regs[count++] = reg; + } + bitset <<= 1; + } + + for (int i = count - 1; i >= 0; i--) { + vl1r_v(as_VectorRegister(regs[i]), stack); + add(stack, stack, vector_size_in_bytes); + } + + return count * vector_size_in_bytes / wordSize; +} +#endif // COMPILER2 + +void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { + CompressibleRegion cr(this); + // Push integer registers x7, x10-x17, x28-x31. + push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); + + // Push float registers f0-f7, f10-f17, f28-f31. + addi(sp, sp, - wordSize * 20); + int offset = 0; + for (int i = 0; i < 32; i++) { + if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { + fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++))); + } + } +} + +void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { + CompressibleRegion cr(this); + int offset = 0; + for (int i = 0; i < 32; i++) { + if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { + fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++))); + } + } + addi(sp, sp, wordSize * 20); + + pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); +} + +// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). +void MacroAssembler::pusha() { + CompressibleRegion cr(this); + push_reg(0xffffffe2, sp); +} + +// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). +void MacroAssembler::popa() { + CompressibleRegion cr(this); + pop_reg(0xffffffe2, sp); +} + +void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { + CompressibleRegion cr(this); + // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) + push_reg(0xffffffe0, sp); + + // float registers + addi(sp, sp, - 32 * wordSize); + for (int i = 0; i < 32; i++) { + fsd(as_FloatRegister(i), Address(sp, i * wordSize)); + } + + // vector registers + if (save_vectors) { + sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers); + vsetvli(t0, x0, Assembler::e64, Assembler::m8); + for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { + add(t0, sp, vector_size_in_bytes * i); + vse64_v(as_VectorRegister(i), t0); + } + } +} + +void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { + CompressibleRegion cr(this); + // vector registers + if (restore_vectors) { + vsetvli(t0, x0, Assembler::e64, Assembler::m8); + for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { + vle64_v(as_VectorRegister(i), sp); + add(sp, sp, vector_size_in_bytes * 8); + } + } + + // float registers + for (int i = 0; i < 32; i++) { + fld(as_FloatRegister(i), Address(sp, i * wordSize)); + } + addi(sp, sp, 32 * wordSize); + + // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) + pop_reg(0xffffffe0, sp); +} + +static int patch_offset_in_jal(address branch, int64_t offset) { + assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n"); + Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] + Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] + Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] + Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] + return NativeInstruction::instruction_size; // only one instruction +} + +static int patch_offset_in_conditional_branch(address branch, int64_t offset) { + assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n"); + Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] + Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] + Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] + Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] + return NativeInstruction::instruction_size; // only one instruction +} + +static int patch_offset_in_pc_relative(address branch, int64_t offset) { + const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load + Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] + Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] + return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size; +} + +static int patch_addr_in_movptr(address branch, address target) { + const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load + int32_t lower = ((intptr_t)target << 36) >> 36; + int64_t upper = ((intptr_t)target - lower) >> 28; + Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[47:28] + target[27] ==> branch[31:12] + Assembler::patch(branch + 4, 31, 20, (lower >> 16) & 0xfff); // Addi. target[27:16] ==> branch[31:20] + Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff); // Addi. target[15: 5] ==> branch[31:20] + Assembler::patch(branch + 20, 31, 20, lower & 0x1f); // Addi/Jalr/Load. target[ 4: 0] ==> branch[31:20] + return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; +} + +static int patch_imm_in_li64(address branch, address target) { + const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi + int64_t lower = (intptr_t)target & 0xffffffff; + lower = lower - ((lower << 44) >> 44); + int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower; + int32_t upper = (tmp_imm - (int32_t)lower) >> 32; + int64_t tmp_upper = upper, tmp_lower = upper; + tmp_lower = (tmp_lower << 52) >> 52; + tmp_upper -= tmp_lower; + tmp_upper >>= 12; + // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1), + // upper = target[63:32] + 1. + Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui. + Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi. + // Load the rest 32 bits. + Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi. + Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi. + Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi. + return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; +} + +static int patch_imm_in_li32(address branch, int32_t target) { + const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw + int64_t upper = (intptr_t)target; + int32_t lower = (((int32_t)target) << 20) >> 20; + upper -= lower; + upper = (int32_t)upper; + Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. + Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. + return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; +} + +static long get_offset_of_jal(address insn_addr) { + assert_cond(insn_addr != NULL); + long offset = 0; + unsigned insn = *(unsigned*)insn_addr; + long val = (long)Assembler::sextract(insn, 31, 12); + offset |= ((val >> 19) & 0x1) << 20; + offset |= (val & 0xff) << 12; + offset |= ((val >> 8) & 0x1) << 11; + offset |= ((val >> 9) & 0x3ff) << 1; + offset = (offset << 43) >> 43; + return offset; +} + +static long get_offset_of_conditional_branch(address insn_addr) { + long offset = 0; + assert_cond(insn_addr != NULL); + unsigned insn = *(unsigned*)insn_addr; + offset = (long)Assembler::sextract(insn, 31, 31); + offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); + offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); + offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); + offset = (offset << 41) >> 41; + return offset; +} + +static long get_offset_of_pc_relative(address insn_addr) { + long offset = 0; + assert_cond(insn_addr != NULL); + offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12; // Auipc. + offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addi/Jalr/Load. + offset = (offset << 32) >> 32; + return offset; +} + +static address get_target_of_movptr(address insn_addr) { + assert_cond(insn_addr != NULL); + intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28; // Lui. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)); // Addi/Jalr/Load. + return (address) target_address; +} + +static address get_target_of_li64(address insn_addr) { + assert_cond(insn_addr != NULL); + intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44; // Lui. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20)); // Addi. + return (address)target_address; +} + +static address get_target_of_li32(address insn_addr) { + assert_cond(insn_addr != NULL); + intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12; // Lui. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addiw. + return (address)target_address; +} + +// Patch any kind of instruction; there may be several instructions. +// Return the total length (in bytes) of the instructions. +int MacroAssembler::pd_patch_instruction_size(address branch, address target) { + assert_cond(branch != NULL); + int64_t offset = target - branch; + if (NativeInstruction::is_jal_at(branch)) { // jal + return patch_offset_in_jal(branch, offset); + } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne + return patch_offset_in_conditional_branch(branch, offset); + } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load + return patch_offset_in_pc_relative(branch, offset); + } else if (NativeInstruction::is_movptr_at(branch)) { // movptr + return patch_addr_in_movptr(branch, target); + } else if (NativeInstruction::is_li64_at(branch)) { // li64 + return patch_imm_in_li64(branch, target); + } else if (NativeInstruction::is_li32_at(branch)) { // li32 + int64_t imm = (intptr_t)target; + return patch_imm_in_li32(branch, (int32_t)imm); + } else { +#ifdef ASSERT + tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", + *(unsigned*)branch, p2i(branch)); + Disassembler::decode(branch - 16, branch + 16); +#endif + ShouldNotReachHere(); + return -1; + } +} + +address MacroAssembler::target_addr_for_insn(address insn_addr) { + long offset = 0; + assert_cond(insn_addr != NULL); + if (NativeInstruction::is_jal_at(insn_addr)) { // jal + offset = get_offset_of_jal(insn_addr); + } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne + offset = get_offset_of_conditional_branch(insn_addr); + } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load + offset = get_offset_of_pc_relative(insn_addr); + } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr + return get_target_of_movptr(insn_addr); + } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64 + return get_target_of_li64(insn_addr); + } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32 + return get_target_of_li32(insn_addr); + } else { + ShouldNotReachHere(); + } + return address(((uintptr_t)insn_addr + offset)); +} + +int MacroAssembler::patch_oop(address insn_addr, address o) { + // OOPs are either narrow (32 bits) or wide (48 bits). We encode + // narrow OOPs by setting the upper 16 bits in the first + // instruction. + if (NativeInstruction::is_li32_at(insn_addr)) { + // Move narrow OOP + uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); + return patch_imm_in_li32(insn_addr, (int32_t)n); + } else if (NativeInstruction::is_movptr_at(insn_addr)) { + // Move wide OOP + return patch_addr_in_movptr(insn_addr, o); + } + ShouldNotReachHere(); + return -1; +} + +void MacroAssembler::reinit_heapbase() { + if (UseCompressedOops) { + if (Universe::is_fully_initialized()) { + mv(xheapbase, CompressedOops::ptrs_base()); + } else { + int32_t offset = 0; + la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset); + ld(xheapbase, Address(xheapbase, offset)); + } + } +} + +void MacroAssembler::mv(Register Rd, Address dest) { + assert(dest.getMode() == Address::literal, "Address mode should be Address::literal"); + code_section()->relocate(pc(), dest.rspec()); + movptr(Rd, dest.target()); +} + +void MacroAssembler::mv(Register Rd, address addr) { + // Here in case of use with relocation, use fix length instruciton + // movptr instead of li + movptr(Rd, addr); +} + +void MacroAssembler::mv(Register Rd, RegisterOrConstant src) { + if (src.is_register()) { + mv(Rd, src.as_register()); + } else { + mv(Rd, src.as_constant()); + } +} + +void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { + andr(Rd, Rs1, Rs2); + // addw: The result is clipped to 32 bits, then the sign bit is extended, + // and the result is stored in Rd + addw(Rd, Rd, zr); +} + +void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { + orr(Rd, Rs1, Rs2); + // addw: The result is clipped to 32 bits, then the sign bit is extended, + // and the result is stored in Rd + addw(Rd, Rd, zr); +} + +void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { + xorr(Rd, Rs1, Rs2); + // addw: The result is clipped to 32 bits, then the sign bit is extended, + // and the result is stored in Rd + addw(Rd, Rd, zr); +} + +// Note: load_unsigned_short used to be called load_unsigned_word. +int MacroAssembler::load_unsigned_short(Register dst, Address src) { + int off = offset(); + lhu(dst, src); + return off; +} + +int MacroAssembler::load_unsigned_byte(Register dst, Address src) { + int off = offset(); + lbu(dst, src); + return off; +} + +int MacroAssembler::load_signed_short(Register dst, Address src) { + int off = offset(); + lh(dst, src); + return off; +} + +int MacroAssembler::load_signed_byte(Register dst, Address src) { + int off = offset(); + lb(dst, src); + return off; +} + +void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { + switch (size_in_bytes) { + case 8: ld(dst, src); break; + case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; + case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; + case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { + switch (size_in_bytes) { + case 8: sd(src, dst); break; + case 4: sw(src, dst); break; + case 2: sh(src, dst); break; + case 1: sb(src, dst); break; + default: ShouldNotReachHere(); + } +} + +// reverse bytes in halfword in lower 16 bits and sign-extend +// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) +void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { + if (UseRVB) { + rev8(Rd, Rs); + srai(Rd, Rd, 48); + return; + } + assert_different_registers(Rs, tmp); + assert_different_registers(Rd, tmp); + srli(tmp, Rs, 8); + andi(tmp, tmp, 0xFF); + slli(Rd, Rs, 56); + srai(Rd, Rd, 48); // sign-extend + orr(Rd, Rd, tmp); +} + +// reverse bytes in lower word and sign-extend +// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) +void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { + if (UseRVB) { + rev8(Rd, Rs); + srai(Rd, Rd, 32); + return; + } + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); + revb_h_w_u(Rd, Rs, tmp1, tmp2); + slli(tmp2, Rd, 48); + srai(tmp2, tmp2, 32); // sign-extend + srli(Rd, Rd, 16); + orr(Rd, Rd, tmp2); +} + +// reverse bytes in halfword in lower 16 bits and zero-extend +// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) +void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { + if (UseRVB) { + rev8(Rd, Rs); + srli(Rd, Rd, 48); + return; + } + assert_different_registers(Rs, tmp); + assert_different_registers(Rd, tmp); + srli(tmp, Rs, 8); + andi(tmp, tmp, 0xFF); + andi(Rd, Rs, 0xFF); + slli(Rd, Rd, 8); + orr(Rd, Rd, tmp); +} + +// reverse bytes in halfwords in lower 32 bits and zero-extend +// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) +void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { + if (UseRVB) { + rev8(Rd, Rs); + rori(Rd, Rd, 32); + roriw(Rd, Rd, 16); + zext_w(Rd, Rd); + return; + } + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); + srli(tmp2, Rs, 16); + revb_h_h_u(tmp2, tmp2, tmp1); + revb_h_h_u(Rd, Rs, tmp1); + slli(tmp2, tmp2, 16); + orr(Rd, Rd, tmp2); +} + +// This method is only used for revb_h +// Rd = Rs[47:0] Rs[55:48] Rs[63:56] +void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1); + srli(tmp1, Rs, 48); + andi(tmp2, tmp1, 0xFF); + slli(tmp2, tmp2, 8); + srli(tmp1, tmp1, 8); + orr(tmp1, tmp1, tmp2); + slli(Rd, Rs, 16); + orr(Rd, Rd, tmp1); +} + +// reverse bytes in each halfword +// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] +void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { + if (UseRVB) { + assert_different_registers(Rs, tmp1); + assert_different_registers(Rd, tmp1); + rev8(Rd, Rs); + zext_w(tmp1, Rd); + roriw(tmp1, tmp1, 16); + slli(tmp1, tmp1, 32); + srli(Rd, Rd, 32); + roriw(Rd, Rd, 16); + zext_w(Rd, Rd); + orr(Rd, Rd, tmp1); + return; + } + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); + revb_h_helper(Rd, Rs, tmp1, tmp2); + for (int i = 0; i < 3; ++i) { + revb_h_helper(Rd, Rd, tmp1, tmp2); + } +} + +// reverse bytes in each word +// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] +void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { + if (UseRVB) { + rev8(Rd, Rs); + rori(Rd, Rd, 32); + return; + } + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); + revb(Rd, Rs, tmp1, tmp2); + ror_imm(Rd, Rd, 32); +} + +// reverse bytes in doubleword +// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] +void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { + if (UseRVB) { + rev8(Rd, Rs); + return; + } + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); + andi(tmp1, Rs, 0xFF); + slli(tmp1, tmp1, 8); + for (int step = 8; step < 56; step += 8) { + srli(tmp2, Rs, step); + andi(tmp2, tmp2, 0xFF); + orr(tmp1, tmp1, tmp2); + slli(tmp1, tmp1, 8); + } + srli(Rd, Rs, 56); + andi(Rd, Rd, 0xFF); + orr(Rd, tmp1, Rd); +} + +// rotate right with shift bits +void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) +{ + if (UseRVB) { + rori(dst, src, shift); + return; + } + + assert_different_registers(dst, tmp); + assert_different_registers(src, tmp); + assert(shift < 64, "shift amount must be < 64"); + slli(tmp, src, 64 - shift); + srli(dst, src, shift); + orr(dst, dst, tmp); +} + +void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { + if (is_imm_in_range(imm, 12, 0)) { + and_imm12(Rd, Rn, imm); + } else { + assert_different_registers(Rn, tmp); + li(tmp, imm); + andr(Rd, Rn, tmp); + } +} + +void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { + ld(tmp1, adr); + if (src.is_register()) { + orr(tmp1, tmp1, src.as_register()); + } else { + if (is_imm_in_range(src.as_constant(), 12, 0)) { + ori(tmp1, tmp1, src.as_constant()); + } else { + assert_different_registers(tmp1, tmp2); + li(tmp2, src.as_constant()); + orr(tmp1, tmp1, tmp2); + } + } + sd(tmp1, adr); +} + +void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) { + if (UseCompressedClassPointers) { + lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); + if (CompressedKlassPointers::base() == NULL) { + slli(tmp, tmp, CompressedKlassPointers::shift()); + beq(trial_klass, tmp, L); + return; + } + decode_klass_not_null(tmp); + } else { + ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); + } + beq(trial_klass, tmp, L); +} + +// Move an oop into a register. immediate is true if we want +// immediate instructions and nmethod entry barriers are not enabled. +// i.e. we are not going to patch this instruction while the code is being +// executed by another thread. +void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { + int oop_index; + if (obj == NULL) { + oop_index = oop_recorder()->allocate_oop_index(obj); + } else { +#ifdef ASSERT + { + ThreadInVMfromUnknown tiv; + assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); + } +#endif + oop_index = oop_recorder()->find_index(obj); + } + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + // nmethod entry barrier necessitate using the constant pool. They have to be + // ordered with respected to oop access. + // Using immediate literals would necessitate fence.i. + if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) { + address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address + ld_constant(dst, Address(dummy, rspec)); + } else + mv(dst, Address((address)obj, rspec)); +} + +// Move a metadata address into a register. +void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { + int oop_index; + if (obj == NULL) { + oop_index = oop_recorder()->allocate_metadata_index(obj); + } else { + oop_index = oop_recorder()->find_index(obj); + } + RelocationHolder rspec = metadata_Relocation::spec(oop_index); + mv(dst, Address((address)obj, rspec)); +} + +// Writes to stack successive pages until offset reached to check for +// stack overflow + shadow pages. This clobbers tmp. +void MacroAssembler::bang_stack_size(Register size, Register tmp) { + assert_different_registers(tmp, size, t0); + // Bang stack for total size given plus shadow page size. + // Bang one page at a time because large size can bang beyond yellow and + // red zones. + mv(t0, os::vm_page_size()); + Label loop; + bind(loop); + sub(tmp, sp, t0); + subw(size, size, t0); + sd(size, Address(tmp)); + bgtz(size, loop); + + // Bang down shadow pages too. + // At this point, (tmp-0) is the last address touched, so don't + // touch it again. (It was touched as (tmp-pagesize) but then tmp + // was post-decremented.) Skip this address by starting at i=1, and + // touch a few more pages below. N.B. It is important to touch all + // the way down to and including i=StackShadowPages. + for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { + // this could be any sized move but this is can be a debugging crumb + // so the bigger the better. + sub(tmp, tmp, os::vm_page_size()); + sd(size, Address(tmp, 0)); + } +} + +SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { + assert_cond(masm != NULL); + int32_t offset = 0; + _masm = masm; + _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset); + _masm->lbu(t0, Address(t0, offset)); + _masm->beqz(t0, _label); +} + +SkipIfEqual::~SkipIfEqual() { + assert_cond(_masm != NULL); + _masm->bind(_label); + _masm = NULL; +} + +void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) { + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + ld(dst, Address(xmethod, Method::const_offset())); + ld(dst, Address(dst, ConstMethod::constants_offset())); + ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes())); + ld(dst, Address(dst, mirror_offset)); + resolve_oop_handle(dst, tmp); +} + +void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { + // OopHandle::resolve is an indirection. + assert_different_registers(result, tmp); + access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg); +} + +// ((WeakHandle)result).resolve() +void MacroAssembler::resolve_weak_handle(Register result, Register tmp) { + assert_different_registers(result, tmp); + Label resolved; + + // A null weak handle resolves to null. + beqz(result, resolved); + + // Only 64 bit platforms support GCs that require a tmp register + // Only IN_HEAP loads require a thread_tmp register + // WeakHandle::resolve is an indirection like jweak. + access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, + result, Address(result), tmp, noreg /* tmp_thread */); + bind(resolved); +} + +void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, + Register dst, Address src, + Register tmp1, Register thread_tmp) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } else { + bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } +} + +void MacroAssembler::null_check(Register reg, int offset) { + if (needs_explicit_null_check(offset)) { + // provoke OS NULL exception if reg = NULL by + // accessing M[reg] w/o changing any registers + // NOTE: this is plenty to provoke a segv + ld(zr, Address(reg, 0)); + } else { + // nothing to do, (later) access of M[reg + offset] + // will provoke OS NULL exception if reg = NULL + } +} + +void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, + Address dst, Register src, + Register tmp1, Register thread_tmp) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } else { + bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } +} + +// Algorithm must match CompressedOops::encode. +void MacroAssembler::encode_heap_oop(Register d, Register s) { + verify_oop(s, "broken oop in encode_heap_oop"); + if (CompressedOops::base() == NULL) { + if (CompressedOops::shift() != 0) { + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + srli(d, s, LogMinObjAlignmentInBytes); + } else { + mv(d, s); + } + } else { + Label notNull; + sub(d, s, xheapbase); + bgez(d, notNull); + mv(d, zr); + bind(notNull); + if (CompressedOops::shift() != 0) { + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + srli(d, d, CompressedOops::shift()); + } + } +} + +void MacroAssembler::load_klass(Register dst, Register src) { + if (UseCompressedClassPointers) { + lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); + decode_klass_not_null(dst); + } else { + ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); + } +} + +void MacroAssembler::store_klass(Register dst, Register src) { + // FIXME: Should this be a store release? concurrent gcs assumes + // klass length is valid if klass field is not null. + if (UseCompressedClassPointers) { + encode_klass_not_null(src); + sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); + } else { + sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); + } +} + +void MacroAssembler::store_klass_gap(Register dst, Register src) { + if (UseCompressedClassPointers) { + // Store to klass gap in destination + sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); + } +} + +void MacroAssembler::decode_klass_not_null(Register r) { + decode_klass_not_null(r, r); +} + +void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + + if (CompressedKlassPointers::base() == NULL) { + if (CompressedKlassPointers::shift() != 0) { + assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + slli(dst, src, LogKlassAlignmentInBytes); + } else { + mv(dst, src); + } + return; + } + + Register xbase = dst; + if (dst == src) { + xbase = tmp; + } + + assert_different_registers(src, xbase); + li(xbase, (uintptr_t)CompressedKlassPointers::base()); + + if (CompressedKlassPointers::shift() != 0) { + assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + assert_different_registers(t0, xbase); + shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); + } else { + add(dst, xbase, src); + } + + if (xbase == xheapbase) { reinit_heapbase(); } +} + +void MacroAssembler::encode_klass_not_null(Register r) { + encode_klass_not_null(r, r); +} + +void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + + if (CompressedKlassPointers::base() == NULL) { + if (CompressedKlassPointers::shift() != 0) { + assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + srli(dst, src, LogKlassAlignmentInBytes); + } else { + mv(dst, src); + } + return; + } + + if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && + CompressedKlassPointers::shift() == 0) { + zero_extend(dst, src, 32); + return; + } + + Register xbase = dst; + if (dst == src) { + xbase = tmp; + } + + assert_different_registers(src, xbase); + li(xbase, (intptr_t)CompressedKlassPointers::base()); + sub(dst, src, xbase); + if (CompressedKlassPointers::shift() != 0) { + assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + srli(dst, dst, LogKlassAlignmentInBytes); + } + if (xbase == xheapbase) { + reinit_heapbase(); + } +} + +void MacroAssembler::decode_heap_oop_not_null(Register r) { + decode_heap_oop_not_null(r, r); +} + +void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { + assert(UseCompressedOops, "should only be used for compressed headers"); + assert(Universe::heap() != NULL, "java heap should be initialized"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (CompressedOops::shift() != 0) { + assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + slli(dst, src, LogMinObjAlignmentInBytes); + if (CompressedOops::base() != NULL) { + add(dst, xheapbase, dst); + } + } else { + assert(CompressedOops::base() == NULL, "sanity"); + mv(dst, src); + } +} + +void MacroAssembler::decode_heap_oop(Register d, Register s) { + if (CompressedOops::base() == NULL) { + if (CompressedOops::shift() != 0 || d != s) { + slli(d, s, CompressedOops::shift()); + } + } else { + Label done; + mv(d, s); + beqz(s, done); + shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); + bind(done); + } + verify_oop(d, "broken oop in decode_heap_oop"); +} + +void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); +} + +void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); +} + +void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp); +} + +// Used for storing NULLs. +void MacroAssembler::store_heap_oop_null(Address dst) { + access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); +} + +int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, + bool want_remainder) +{ + // Full implementation of Java idiv and irem. The function + // returns the (pc) offset of the div instruction - may be needed + // for implicit exceptions. + // + // input : rs1: dividend + // rs2: divisor + // + // result: either + // quotient (= rs1 idiv rs2) + // remainder (= rs1 irem rs2) + + + int idivl_offset = offset(); + if (!want_remainder) { + divw(result, rs1, rs2); + } else { + remw(result, rs1, rs2); // result = rs1 % rs2; + } + return idivl_offset; +} + +int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, + bool want_remainder) +{ + // Full implementation of Java ldiv and lrem. The function + // returns the (pc) offset of the div instruction - may be needed + // for implicit exceptions. + // + // input : rs1: dividend + // rs2: divisor + // + // result: either + // quotient (= rs1 idiv rs2) + // remainder (= rs1 irem rs2) + + int idivq_offset = offset(); + if (!want_remainder) { + div(result, rs1, rs2); + } else { + rem(result, rs1, rs2); // result = rs1 % rs2; + } + return idivq_offset; +} + +// Look up the method for a megamorpic invkkeinterface call. +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_tmp, + Label& L_no_such_interface, + bool return_method) { + assert_different_registers(recv_klass, intf_klass, scan_tmp); + assert_different_registers(method_result, intf_klass, scan_tmp); + assert(recv_klass != method_result || !return_method, + "recv_klass can be destroyed when mehtid isn't needed"); + assert(itable_index.is_constant() || itable_index.as_register() == method_result, + "caller must be same register for non-constant itable index as for method"); + + // Compute start of first itableOffsetEntry (which is at the end of the vtable). + int vtable_base = in_bytes(Klass::vtable_start_offset()); + int itentry_off = itableMethodEntry::method_offset_in_bytes(); + int scan_step = itableOffsetEntry::size() * wordSize; + int vte_size = vtableEntry::size_in_bytes(); + assert(vte_size == wordSize, "else adjust times_vte_scale"); + + lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); + + // %%% Could store the aligned, prescaled offset in the klassoop. + shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); + add(scan_tmp, scan_tmp, vtable_base); + + if (return_method) { + // Adjust recv_klass by scaled itable_index, so we can free itable_index. + assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + if (itable_index.is_register()) { + slli(t0, itable_index.as_register(), 3); + } else { + li(t0, itable_index.as_constant() << 3); + } + add(recv_klass, recv_klass, t0); + if (itentry_off) { + add(recv_klass, recv_klass, itentry_off); + } + } + + Label search, found_method; + + ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); + beq(intf_klass, method_result, found_method); + bind(search); + // Check that the previous entry is non-null. A null entry means that + // the receiver class doens't implement the interface, and wasn't the + // same as when the caller was compiled. + beqz(method_result, L_no_such_interface, /* is_far */ true); + addi(scan_tmp, scan_tmp, scan_step); + ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); + bne(intf_klass, method_result, search); + + bind(found_method); + + // Got a hit. + if (return_method) { + lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes())); + add(method_result, recv_klass, scan_tmp); + ld(method_result, Address(method_result)); + } +} + +// virtual method calling +void MacroAssembler::lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result) { + const int base = in_bytes(Klass::vtable_start_offset()); + assert(vtableEntry::size() * wordSize == 8, + "adjust the scaling in the code below"); + int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes(); + + if (vtable_index.is_register()) { + shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); + ld(method_result, Address(method_result, vtable_offset_in_bytes)); + } else { + vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; + ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); + } +} + +void MacroAssembler::membar(uint32_t order_constraint) { + address prev = pc() - NativeMembar::instruction_size; + address last = code()->last_insn(); + + if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) { + NativeMembar *bar = NativeMembar_at(prev); + // We are merging two memory barrier instructions. On RISCV we + // can do this simply by ORing them together. + bar->set_kind(bar->get_kind() | order_constraint); + BLOCK_COMMENT("merged membar"); + } else { + code()->set_last_insn(pc()); + + uint32_t predecessor = 0; + uint32_t successor = 0; + + membar_mask_to_pred_succ(order_constraint, predecessor, successor); + fence(predecessor, successor); + } +} + +// Form an addres from base + offset in Rd. Rd my or may not +// actually be used: you must use the Address that is returned. It +// is up to you to ensure that the shift provided mathces the size +// of your data. +Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) { + if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12 + return Address(base, byte_offset); + } + + // Do it the hard way + mv(Rd, byte_offset); + add(Rd, base, Rd); + return Address(Rd); +} + +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register tmp_reg, + Label& L_success) { + Label L_failure; + check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL); + check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL); + bind(L_failure); +} + +void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { + ld(t0, Address(xthread, JavaThread::polling_word_offset())); + if (acquire) { + membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + } + if (at_return) { + bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */); + } else { + andi(t0, t0, SafepointMechanism::poll_bit()); + bnez(t0, slow_path, true /* is_far */); + } +} + +void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, + Label &succeed, Label *fail) { + // oldv holds comparison value + // newv holds value to write in exchange + // addr identifies memory word to compare against/update + Label retry_load, nope; + bind(retry_load); + // Load reserved from the memory location + lr_d(tmp, addr, Assembler::aqrl); + // Fail and exit if it is not what we expect + bne(tmp, oldv, nope); + // If the store conditional succeeds, tmp will be zero + sc_d(tmp, newv, addr, Assembler::rl); + beqz(tmp, succeed); + // Retry only when the store conditional failed + j(retry_load); + + bind(nope); + membar(AnyAny); + mv(oldv, tmp); + if (fail != NULL) { + j(*fail); + } +} + +void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, + Label &succeed, Label *fail) { + assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); + cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); +} + +void MacroAssembler::load_reserved(Register addr, + enum operand_size size, + Assembler::Aqrl acquire) { + switch (size) { + case int64: + lr_d(t0, addr, acquire); + break; + case int32: + lr_w(t0, addr, acquire); + break; + case uint32: + lr_w(t0, addr, acquire); + zero_extend(t0, t0, 32); + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::store_conditional(Register addr, + Register new_val, + enum operand_size size, + Assembler::Aqrl release) { + switch (size) { + case int64: + sc_d(t0, new_val, addr, release); + break; + case int32: + case uint32: + sc_w(t0, new_val, addr, release); + break; + default: + ShouldNotReachHere(); + } +} + + +void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, + Register new_val, + enum operand_size size, + Register tmp1, Register tmp2, Register tmp3) { + assert(size == int8 || size == int16, "unsupported operand size"); + + Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; + + andi(shift, addr, 3); + slli(shift, shift, 3); + + andi(aligned_addr, addr, ~3); + + if (size == int8) { + addi(mask, zr, 0xff); + } else { + // size == int16 case + addi(mask, zr, -1); + zero_extend(mask, mask, 16); + } + sll(mask, mask, shift); + + xori(not_mask, mask, -1); + + sll(expected, expected, shift); + andr(expected, expected, mask); + + sll(new_val, new_val, shift); + andr(new_val, new_val, mask); +} + +// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. +// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w, +// which are forced to work with 4-byte aligned address. +void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, + Register new_val, + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, bool result_as_bool, + Register tmp1, Register tmp2, Register tmp3) { + Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; + assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); + cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); + + Label retry, fail, done; + + bind(retry); + lr_w(old, aligned_addr, acquire); + andr(tmp, old, mask); + bne(tmp, expected, fail); + + andr(tmp, old, not_mask); + orr(tmp, tmp, new_val); + sc_w(tmp, tmp, aligned_addr, release); + bnez(tmp, retry); + + if (result_as_bool) { + addi(result, zr, 1); + j(done); + + bind(fail); + mv(result, zr); + + bind(done); + } else { + andr(tmp, old, mask); + + bind(fail); + srl(result, tmp, shift); + + if (size == int8) { + sign_extend(result, result, 8); + } else { + // size == int16 case + sign_extend(result, result, 16); + } + } +} + +// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement +// the weak CAS stuff. The major difference is that it just failed when store conditional +// failed. +void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, + Register new_val, + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, + Register tmp1, Register tmp2, Register tmp3) { + Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; + assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); + cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); + + Label succ, fail, done; + + lr_w(old, aligned_addr, acquire); + andr(tmp, old, mask); + bne(tmp, expected, fail); + + andr(tmp, old, not_mask); + orr(tmp, tmp, new_val); + sc_w(tmp, tmp, aligned_addr, release); + beqz(tmp, succ); + + bind(fail); + addi(result, zr, 1); + j(done); + + bind(succ); + mv(result, zr); + + bind(done); +} + +void MacroAssembler::cmpxchg(Register addr, Register expected, + Register new_val, + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, bool result_as_bool) { + assert(size != int8 && size != int16, "unsupported operand size"); + + Label retry_load, done, ne_done; + bind(retry_load); + load_reserved(addr, size, acquire); + bne(t0, expected, ne_done); + store_conditional(addr, new_val, size, release); + bnez(t0, retry_load); + + // equal, succeed + if (result_as_bool) { + li(result, 1); + } else { + mv(result, expected); + } + j(done); + + // not equal, failed + bind(ne_done); + if (result_as_bool) { + mv(result, zr); + } else { + mv(result, t0); + } + + bind(done); +} + +void MacroAssembler::cmpxchg_weak(Register addr, Register expected, + Register new_val, + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result) { + Label fail, done, sc_done; + load_reserved(addr, size, acquire); + bne(t0, expected, fail); + store_conditional(addr, new_val, size, release); + beqz(t0, sc_done); + + // fail + bind(fail); + li(result, 1); + j(done); + + // sc_done + bind(sc_done); + mv(result, 0); + bind(done); +} + +#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ +void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ + prev = prev->is_valid() ? prev : zr; \ + if (incr.is_register()) { \ + AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ + } else { \ + mv(t0, incr.as_constant()); \ + AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ + } \ + return; \ +} + +ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) +ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) +ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) +ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) + +#undef ATOMIC_OP + +#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ +void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ + prev = prev->is_valid() ? prev : zr; \ + AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ + return; \ +} + +ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) +ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) +ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) +ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) + +#undef ATOMIC_XCHG + +#define ATOMIC_XCHGU(OP1, OP2) \ +void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ + atomic_##OP2(prev, newv, addr); \ + zero_extend(prev, prev, 32); \ + return; \ +} + +ATOMIC_XCHGU(xchgwu, xchgw) +ATOMIC_XCHGU(xchgalwu, xchgalw) + +#undef ATOMIC_XCHGU + +void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done, Register flag) { + assert(UseBiasedLocking, "why call this otherwise?"); + + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + andi(temp_reg, temp_reg, markWord::biased_lock_mask_in_place); // 1 << 3 + sub(temp_reg, temp_reg, (u1)markWord::biased_lock_pattern); + if (flag->is_valid()) { mv(flag, temp_reg); } + beqz(temp_reg, done); +} + +void MacroAssembler::load_prototype_header(Register dst, Register src) { + load_klass(dst, src); + ld(dst, Address(dst, Klass::prototype_header_offset())); +} + +void MacroAssembler::biased_locking_enter(Register lock_reg, + Register obj_reg, + Register swap_reg, + Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, + Label* slow_case, + BiasedLockingCounters* counters, + Register flag) { + assert(UseBiasedLocking, "why call this otherwise?"); + assert_different_registers(lock_reg, obj_reg, swap_reg); + + if (PrintBiasedLockingStatistics && counters == NULL) { + counters = BiasedLocking::counters(); + } + + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0, flag); + assert(markWord::age_shift == markWord::lock_bits + markWord::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + if (!swap_reg_contains_mark) { + ld(swap_reg, mark_addr); + } + andi(tmp_reg, swap_reg, markWord::biased_lock_mask_in_place); + xori(t0, tmp_reg, (u1)markWord::biased_lock_pattern); + bnez(t0, cas_label); // don't care flag unless jumping to done + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. + load_prototype_header(tmp_reg, obj_reg); + orr(tmp_reg, tmp_reg, xthread); + xorr(tmp_reg, swap_reg, tmp_reg); + andi(tmp_reg, tmp_reg, ~((int) markWord::age_mask_in_place)); + if (flag->is_valid()) { + mv(flag, tmp_reg); + } + + if (counters != NULL) { + Label around; + bnez(tmp_reg, around); + atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0); + j(done); + bind(around); + } else { + beqz(tmp_reg, done); + } + + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + andi(t0, tmp_reg, markWord::biased_lock_mask_in_place); + bnez(t0, try_revoke_bias); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + andi(t0, tmp_reg, markWord::epoch_mask_in_place); + bnez(t0, try_rebias); + + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + { + Label cas_success; + Label counter; + li(t0, (int64_t)(markWord::biased_lock_mask_in_place | markWord::age_mask_in_place | markWord::epoch_mask_in_place)); + andr(swap_reg, swap_reg, t0); + orr(tmp_reg, swap_reg, xthread); + cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); + // cas failed here if slow_cass == NULL + if (flag->is_valid()) { + li(flag, 1); + j(counter); + } + + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + bind(cas_success); + if (flag->is_valid()) { + li(flag, 0); + bind(counter); + } + + if (counters != NULL) { + atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), + tmp_reg, t0); + } + } + j(done); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + { + Label cas_success; + Label counter; + load_prototype_header(tmp_reg, obj_reg); + orr(tmp_reg, xthread, tmp_reg); + cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); + // cas failed here if slow_cass == NULL + if (flag->is_valid()) { + li(flag, 1); + j(counter); + } + + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + bind(cas_success); + if (flag->is_valid()) { + li(flag, 0); + bind(counter); + } + + if (counters != NULL) { + atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), + tmp_reg, t0); + } + } + j(done); + + // don't care flag unless jumping to done + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + { + Label cas_success, nope; + load_prototype_header(tmp_reg, obj_reg); + cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope); + bind(cas_success); + + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (counters != NULL) { + atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, + t0); + } + bind(nope); + } + + bind(cas_label); +} + +void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { + Label retry_load; + bind(retry_load); + // flush and load exclusive from the memory location + lr_w(tmp, counter_addr); + addw(tmp, tmp, 1); + // if we store+flush with no intervening write tmp wil be zero + sc_w(tmp, tmp, counter_addr); + bnez(tmp, retry_load); +} + +void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) { + assert(ReservedCodeCacheSize < 4*G, "branch out of range"); + assert(CodeCache::find_blob(entry.target()) != NULL, + "destination of far call not found in code cache"); + int32_t offset = 0; + if (far_branches()) { + // We can use auipc + jalr here because we know that the total size of + // the code cache cannot exceed 2Gb. + la_patchable(tmp, entry, offset); + if (cbuf != NULL) { cbuf->set_insts_mark(); } + jalr(x0, tmp, offset); + } else { + if (cbuf != NULL) { cbuf->set_insts_mark(); } + j(entry); + } +} + +void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) { + assert(ReservedCodeCacheSize < 4*G, "branch out of range"); + assert(CodeCache::find_blob(entry.target()) != NULL, + "destination of far call not found in code cache"); + int32_t offset = 0; + if (far_branches()) { + // We can use auipc + jalr here because we know that the total size of + // the code cache cannot exceed 2Gb. + la_patchable(tmp, entry, offset); + if (cbuf != NULL) { cbuf->set_insts_mark(); } + jalr(x1, tmp, offset); // link + } else { + if (cbuf != NULL) { cbuf->set_insts_mark(); } + jal(entry); // link + } +} + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register tmp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + Register super_check_offset) { + assert_different_registers(sub_klass, super_klass, tmp_reg); + bool must_load_sco = (super_check_offset == noreg); + if (must_load_sco) { + assert(tmp_reg != noreg, "supply either a temp or a register offset"); + } else { + assert_different_registers(sub_klass, super_klass, super_check_offset); + } + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in batch"); + + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + Address super_check_offset_addr(super_klass, sco_offset); + + // Hacked jmp, which may only be used just before L_fallthrough. +#define final_jmp(label) \ + if (&(label) == &L_fallthrough) { /*do nothing*/ } \ + else j(label) /*omit semi*/ + + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front fo the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + beq(sub_klass, super_klass, *L_success); + + // Check the supertype display: + if (must_load_sco) { + lwu(tmp_reg, super_check_offset_addr); + super_check_offset = tmp_reg; + } + add(t0, sub_klass, super_check_offset); + Address super_check_addr(t0); + ld(t0, super_check_addr); // load displayed supertype + + // Ths check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_Cache and the primary super dispaly elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + beq(super_klass, t0, *L_success); + mv(t1, sc_offset); + if (L_failure == &L_fallthrough) { + beq(super_check_offset, t1, *L_slow_path); + } else { + bne(super_check_offset, t1, *L_failure, /* is_far */ true); + final_jmp(*L_slow_path); + } + + bind(L_fallthrough); + +#undef final_jmp +} + +// Scans count pointer sized words at [addr] for occurence of value, +// generic +void MacroAssembler::repne_scan(Register addr, Register value, Register count, + Register tmp) { + Label Lloop, Lexit; + beqz(count, Lexit); + bind(Lloop); + ld(tmp, addr); + beq(value, tmp, Lexit); + add(addr, addr, wordSize); + sub(count, count, 1); + bnez(count, Lloop); + bind(Lexit); +} + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register tmp1_reg, + Register tmp2_reg, + Label* L_success, + Label* L_failure) { + assert_different_registers(sub_klass, super_klass, tmp1_reg); + if (tmp2_reg != noreg) { + assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); + } +#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // A couple of usefule fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + Address secondary_supers_addr(sub_klass, ss_offset); + Address super_cache_addr( sub_klass, sc_offset); + + BLOCK_COMMENT("check_klass_subtype_slow_path"); + + // Do a linear scan of the secondary super-klass chain. + // This code is rarely used, so simplicity is a virtue here. + // The repne_scan instruction uses fixed registers, which we must spill. + // Don't worry too much about pre-existing connecitons with the input regs. + + assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) + assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) + + RegSet pushed_registers; + if (!IS_A_TEMP(x12)) { + pushed_registers += x12; + } + if (!IS_A_TEMP(x15)) { + pushed_registers += x15; + } + + if (super_klass != x10 || UseCompressedOops) { + if (!IS_A_TEMP(x10)) { + pushed_registers += x10; + } + } + + push_reg(pushed_registers, sp); + + // Get super_klass value into x10 (even if it was in x15 or x12) + mv(x10, super_klass); + +#ifndef PRODUCT + mv(t1, (address)&SharedRuntime::_partial_subtype_ctr); + Address pst_counter_addr(t1); + ld(t0, pst_counter_addr); + add(t0, t0, 1); + sd(t0, pst_counter_addr); +#endif // PRODUCT + + // We will consult the secondary-super array. + ld(x15, secondary_supers_addr); + // Load the array length. + lwu(x12, Address(x15, Array::length_offset_in_bytes())); + // Skip to start of data. + add(x15, x15, Array::base_offset_in_bytes()); + + // Set t0 to an obvious invalid value, falling through by default + li(t0, -1); + // Scan X12 words at [X15] for an occurrence of X10. + repne_scan(x15, x10, x12, t0); + + // pop will restore x10, so we should use a temp register to keep its value + mv(t1, x10); + + // Unspill the temp registers: + pop_reg(pushed_registers, sp); + + bne(t1, t0, *L_failure); + + // Success. Cache the super we found an proceed in triumph. + sd(super_klass, super_cache_addr); + + if (L_success != &L_fallthrough) { + j(*L_success); + } + +#undef IS_A_TEMP + + bind(L_fallthrough); +} + +// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. +void MacroAssembler::tlab_allocate(Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register tmp1, + Register tmp2, + Label& slow_case, + bool is_far) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); +} + +// Defines obj, preserves var_size_in_bytes +void MacroAssembler::eden_allocate(Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register tmp, + Label& slow_case, + bool is_far) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far); +} + + +// get_thread() can be called anywhere inside generated code so we +// need to save whatever non-callee save context might get clobbered +// by the call to Thread::current() or, indeed, the call setup code. +void MacroAssembler::get_thread(Register thread) { + // save all call-clobbered regs except thread + RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + + RegSet::range(x28, x31) + ra - thread; + push_reg(saved_regs, sp); + + int32_t offset = 0; + movptr_with_offset(ra, CAST_FROM_FN_PTR(address, Thread::current), offset); + jalr(ra, ra, offset); + if (thread != x10) { + mv(thread, x10); + } + + // restore pushed registers + pop_reg(saved_regs, sp); +} + +void MacroAssembler::load_byte_map_base(Register reg) { + CardTable::CardValue* byte_map_base = + ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); + li(reg, (uint64_t)byte_map_base); +} + +void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) { + relocInfo::relocType rtype = dest.rspec().reloc()->type(); + unsigned long low_address = (uintptr_t)CodeCache::low_bound(); + unsigned long high_address = (uintptr_t)CodeCache::high_bound(); + unsigned long dest_address = (uintptr_t)dest.target(); + long offset_low = dest_address - low_address; + long offset_high = dest_address - high_address; + + assert(is_valid_riscv64_address(dest.target()), "bad address"); + assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address"); + + InstructionMark im(this); + code_section()->relocate(inst_mark(), dest.rspec()); + // RISC-V doesn't compute a page-aligned address, in order to partially + // compensate for the use of *signed* offsets in its base+disp12 + // addressing mode (RISC-V's PC-relative reach remains asymmetric + // [-(2G + 2K), 2G - 2k). + if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) { + int64_t distance = dest.target() - pc(); + auipc(reg1, (int32_t)distance + 0x800); + offset = ((int32_t)distance << 20) >> 20; + } else { + movptr_with_offset(reg1, dest.target(), offset); + } +} + +void MacroAssembler::build_frame(int framesize) { + assert(framesize >= 2, "framesize must include space for FP/RA"); + assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); + sub(sp, sp, framesize); + sd(fp, Address(sp, framesize - 2 * wordSize)); + sd(ra, Address(sp, framesize - wordSize)); + if (PreserveFramePointer) { add(fp, sp, framesize); } + verify_cross_modify_fence_not_required(); +} + +void MacroAssembler::remove_frame(int framesize) { + assert(framesize >= 2, "framesize must include space for FP/RA"); + assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); + ld(fp, Address(sp, framesize - 2 * wordSize)); + ld(ra, Address(sp, framesize - wordSize)); + add(sp, sp, framesize); +} + +void MacroAssembler::reserved_stack_check() { + // testing if reserved zone needs to be enabled + Label no_reserved_zone_enabling; + + ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); + bltu(sp, t0, no_reserved_zone_enabling); + + enter(); // RA and FP are live. + mv(c_rarg0, xthread); + int32_t offset = 0; + la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset); + jalr(x1, t0, offset); + leave(); + + // We have already removed our own frame. + // throw_delayed_StackOverflowError will think that it's been + // called by our caller. + offset = 0; + la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset); + jalr(x0, t0, offset); + should_not_reach_here(); + + bind(no_reserved_zone_enabling); +} + +// Move the address of the polling page into dest. +void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { + ld(dest, Address(xthread, JavaThread::polling_page_offset())); +} + +// Read the polling page. The address of the polling page must +// already be in r. +address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { + address mark; + { + InstructionMark im(this); + code_section()->relocate(inst_mark(), rtype); + lwu(zr, Address(r, offset)); + mark = inst_mark(); + } + verify_cross_modify_fence_not_required(); + return mark; +} + +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { +#ifdef ASSERT + { + ThreadInVMfromUnknown tiv; + assert (UseCompressedOops, "should only be used for compressed oops"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); + } +#endif + int oop_index = oop_recorder()->find_index(obj); + InstructionMark im(this); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + code_section()->relocate(inst_mark(), rspec); + li32(dst, 0xDEADBEEF); + zero_extend(dst, dst, 32); +} + +void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { + assert (UseCompressedClassPointers, "should only be used for compressed headers"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int index = oop_recorder()->find_index(k); + assert(!Universe::heap()->is_in(k), "should not be an oop"); + + InstructionMark im(this); + RelocationHolder rspec = metadata_Relocation::spec(index); + code_section()->relocate(inst_mark(), rspec); + narrowKlass nk = CompressedKlassPointers::encode(k); + li32(dst, nk); + zero_extend(dst, dst, 32); +} + +// Maybe emit a call via a trampoline. If the code cache is small +// trampolines won't be emitted. +address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) { + assert(JavaThread::current()->is_Compiler_thread(), "just checking"); + assert(entry.rspec().type() == relocInfo::runtime_call_type || + entry.rspec().type() == relocInfo::opt_virtual_call_type || + entry.rspec().type() == relocInfo::static_call_type || + entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); + + bool need_trampoline = far_branches(); + if (!need_trampoline && entry.rspec().type() == relocInfo::runtime_call_type && !CodeCache::contains(entry.target())) { + // If it is a runtime call of an address outside small CodeCache, + // we need to check whether it is in range. + address target = entry.target(); + assert(target < CodeCache::low_bound() || target >= CodeCache::high_bound(), "target is inside CodeCache"); + // Case 1: -------T-------L====CodeCache====H------- + // ^-------longest branch---| + // Case 2: -------L====CodeCache====H-------T------- + // |-------longest branch ---^ + address longest_branch_start = (target < CodeCache::low_bound()) ? CodeCache::high_bound() - NativeInstruction::instruction_size + : CodeCache::low_bound(); + need_trampoline = !reachable_from_branch_at(longest_branch_start, target); + } + + // We need a trampoline if branches are far. + if (need_trampoline) { + bool in_scratch_emit_size = false; +#ifdef COMPILER2 + // We don't want to emit a trampoline if C2 is generating dummy + // code during its branch shortening phase. + CompileTask* task = ciEnv::current()->task(); + in_scratch_emit_size = + (task != NULL && is_c2_compile(task->comp_level()) && + Compile::current()->output()->in_scratch_emit_size()); +#endif + if (!in_scratch_emit_size) { + address stub = emit_trampoline_stub(offset(), entry.target()); + if (stub == NULL) { + postcond(pc() == badAddress); + return NULL; // CodeCache is full + } + } + } + + if (cbuf != NULL) { cbuf->set_insts_mark(); } + relocate(entry.rspec()); + if (!need_trampoline) { + jal(entry.target()); + } else { + jal(pc()); + } + // just need to return a non-null address + postcond(pc() != badAddress); + return pc(); +} + +address MacroAssembler::ic_call(address entry, jint method_index) { + RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); + movptr(t1, (address)Universe::non_oop_word()); + assert_cond(entry != NULL); + return trampoline_call(Address(entry, rh)); +} + +// Emit a trampoline stub for a call to a target which is too far away. +// +// code sequences: +// +// call-site: +// branch-and-link to or +// +// Related trampoline stub for this call site in the stub section: +// load the call target from the constant pool +// branch (RA still points to the call site above) + +address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, + address dest) { + address stub = start_a_stub(NativeInstruction::instruction_size + + NativeCallTrampolineStub::instruction_size); + if (stub == NULL) { + return NULL; // CodeBuffer::expand failed + } + + // Create a trampoline stub relocation which relates this trampoline stub + // with the call instruction at insts_call_instruction_offset in the + // instructions code-section. + + // make sure 4 byte aligned here, so that the destination address would be + // 8 byte aligned after 3 intructions + // when we reach here we may get a 2-byte alignment so need to align it + align(wordSize, NativeCallTrampolineStub::data_offset); + + relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + + insts_call_instruction_offset)); + const int stub_start_offset = offset(); + + // Now, create the trampoline stub's code: + // - load the call + // - call + Label target; + ld(t0, target); // auipc + ld + jr(t0); // jalr + bind(target); + assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, + "should be"); + assert(offset() % wordSize == 0, "bad alignment"); + emit_int64((intptr_t)dest); + + const address stub_start_addr = addr_at(stub_start_offset); + + assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); + + end_a_stub(); + return stub_start_addr; +} + +Address MacroAssembler::add_memory_helper(const Address dst) { + switch (dst.getMode()) { + case Address::base_plus_offset: + // This is the expected mode, although we allow all the other + // forms below. + return form_address(t1, dst.base(), dst.offset()); + default: + la(t1, dst); + return Address(t1); + } +} + +void MacroAssembler::add_memory_int64(const Address dst, int64_t imm) { + Address adr = add_memory_helper(dst); + assert_different_registers(adr.base(), t0); + ld(t0, adr); + addi(t0, t0, imm); + sd(t0, adr); +} + +void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) { + Address adr = add_memory_helper(dst); + assert_different_registers(adr.base(), t0); + lwu(t0, adr); + addiw(t0, t0, imm); + sw(t0, adr); +} + +void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { + assert_different_registers(src1, t0); + int32_t offset; + la_patchable(t0, src2, offset); + ld(t0, Address(t0, offset)); + beq(src1, t0, equal); +} + +void MacroAssembler::load_method_holder_cld(Register result, Register method) { + load_method_holder(result, method); + ld(result, Address(result, InstanceKlass::class_loader_data_offset())); +} + +void MacroAssembler::load_method_holder(Register holder, Register method) { + ld(holder, Address(method, Method::const_offset())); // ConstMethod* + ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* + ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* +} + +// string indexof +// compute index by trailing zeros +void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, + Register match_mask, Register result, + Register ch2, Register tmp, + bool haystack_isL) +{ + int haystack_chr_shift = haystack_isL ? 0 : 1; + srl(match_mask, match_mask, trailing_zeros); + srli(match_mask, match_mask, 1); + srli(tmp, trailing_zeros, LogBitsPerByte); + if (!haystack_isL) andi(tmp, tmp, 0xE); + add(haystack, haystack, tmp); + ld(ch2, Address(haystack)); + if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); + add(result, result, tmp); +} + +// string indexof +// Find pattern element in src, compute match mask, +// only the first occurrence of 0x80/0x8000 at low bits is the valid match index +// match mask patterns and corresponding indices would be like: +// - 0x8080808080808080 (Latin1) +// - 7 6 5 4 3 2 1 0 (match index) +// - 0x8000800080008000 (UTF16) +// - 3 2 1 0 (match index) +void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, + Register mask1, Register mask2) +{ + xorr(src, pattern, src); + sub(match_mask, src, mask1); + orr(src, src, mask2); + notr(src, src); + andr(match_mask, match_mask, src); +} + +#ifdef COMPILER2 +// Code for BigInteger::mulAdd instrinsic +// out = x10 +// in = x11 +// offset = x12 (already out.length-offset) +// len = x13 +// k = x14 +// tmp = x28 +// +// pseudo code from java implementation: +// long kLong = k & LONG_MASK; +// carry = 0; +// offset = out.length-offset - 1; +// for (int j = len - 1; j >= 0; j--) { +// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; +// out[offset--] = (int)product; +// carry = product >>> 32; +// } +// return (int)carry; +void MacroAssembler::mul_add(Register out, Register in, Register offset, + Register len, Register k, Register tmp) { + Label L_tail_loop, L_unroll, L_end; + mv(tmp, out); + mv(out, zr); + blez(len, L_end); + zero_extend(k, k, 32); + slliw(t0, offset, LogBytesPerInt); + add(offset, tmp, t0); + slliw(t0, len, LogBytesPerInt); + add(in, in, t0); + + const int unroll = 8; + li(tmp, unroll); + blt(len, tmp, L_tail_loop); + bind(L_unroll); + for (int i = 0; i < unroll; i++) { + sub(in, in, BytesPerInt); + lwu(t0, Address(in, 0)); + mul(t1, t0, k); + add(t0, t1, out); + sub(offset, offset, BytesPerInt); + lwu(t1, Address(offset, 0)); + add(t0, t0, t1); + sw(t0, Address(offset, 0)); + srli(out, t0, 32); + } + subw(len, len, tmp); + bge(len, tmp, L_unroll); + + bind(L_tail_loop); + blez(len, L_end); + sub(in, in, BytesPerInt); + lwu(t0, Address(in, 0)); + mul(t1, t0, k); + add(t0, t1, out); + sub(offset, offset, BytesPerInt); + lwu(t1, Address(offset, 0)); + add(t0, t0, t1); + sw(t0, Address(offset, 0)); + srli(out, t0, 32); + subw(len, len, 1); + j(L_tail_loop); + + bind(L_end); +} + +// add two unsigned input and output carry +void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) +{ + assert_different_registers(dst, carry); + assert_different_registers(dst, src2); + add(dst, src1, src2); + sltu(carry, dst, src2); +} + +// add two input with carry +void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) +{ + assert_different_registers(dst, carry); + add(dst, src1, src2); + add(dst, dst, carry); +} + +// add two unsigned input with carry and output carry +void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) +{ + assert_different_registers(dst, src2); + adc(dst, src1, src2, carry); + sltu(carry, dst, src2); +} + +void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, + Register src1, Register src2, Register carry) +{ + cad(dest_lo, dest_lo, src1, carry); + add(dest_hi, dest_hi, carry); + cad(dest_lo, dest_lo, src2, carry); + add(final_dest_hi, dest_hi, carry); +} + +/** + * Multiply 32 bit by 32 bit first loop. + */ +void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx) +{ + // jlong carry, x[], y[], z[]; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { + // long product = y[idx] * x[xstart] + carry; + // z[kdx] = (int)product; + // carry = product >>> 32; + // } + // z[xstart] = (int)carry; + + Label L_first_loop, L_first_loop_exit; + blez(idx, L_first_loop_exit); + + shadd(t0, xstart, x, t0, LogBytesPerInt); + lwu(x_xstart, Address(t0, 0)); + + bind(L_first_loop); + subw(idx, idx, 1); + shadd(t0, idx, y, t0, LogBytesPerInt); + lwu(y_idx, Address(t0, 0)); + mul(product, x_xstart, y_idx); + add(product, product, carry); + srli(carry, product, 32); + subw(kdx, kdx, 1); + shadd(t0, kdx, z, t0, LogBytesPerInt); + sw(product, Address(t0, 0)); + bgtz(idx, L_first_loop); + + bind(L_first_loop_exit); +} + +/** + * Multiply 64 bit by 64 bit first loop. + */ +void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx) +{ + // + // jlong carry, x[], y[], z[]; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { + // huge_128 product = y[idx] * x[xstart] + carry; + // z[kdx] = (jlong)product; + // carry = (jlong)(product >>> 64); + // } + // z[xstart] = carry; + // + + Label L_first_loop, L_first_loop_exit; + Label L_one_x, L_one_y, L_multiply; + + subw(xstart, xstart, 1); + bltz(xstart, L_one_x); + + shadd(t0, xstart, x, t0, LogBytesPerInt); + ld(x_xstart, Address(t0, 0)); + ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian + + bind(L_first_loop); + subw(idx, idx, 1); + bltz(idx, L_first_loop_exit); + subw(idx, idx, 1); + bltz(idx, L_one_y); + + shadd(t0, idx, y, t0, LogBytesPerInt); + ld(y_idx, Address(t0, 0)); + ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian + bind(L_multiply); + + mulhu(t0, x_xstart, y_idx); + mul(product, x_xstart, y_idx); + cad(product, product, carry, t1); + adc(carry, t0, zr, t1); + + subw(kdx, kdx, 2); + ror_imm(product, product, 32); // back to big-endian + shadd(t0, kdx, z, t0, LogBytesPerInt); + sd(product, Address(t0, 0)); + + j(L_first_loop); + + bind(L_one_y); + lwu(y_idx, Address(y, 0)); + j(L_multiply); + + bind(L_one_x); + lwu(x_xstart, Address(x, 0)); + j(L_first_loop); + + bind(L_first_loop_exit); +} + +/** + * Multiply 128 bit by 128 bit. Unrolled inner loop. + * + */ +void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, + Register carry, Register carry2, + Register idx, Register jdx, + Register yz_idx1, Register yz_idx2, + Register tmp, Register tmp3, Register tmp4, + Register tmp6, Register product_hi) +{ + // jlong carry, x[], y[], z[]; + // int kdx = xstart+1; + // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop + // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; + // jlong carry2 = (jlong)(tmp3 >>> 64); + // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; + // carry = (jlong)(tmp4 >>> 64); + // z[kdx+idx+1] = (jlong)tmp3; + // z[kdx+idx] = (jlong)tmp4; + // } + // idx += 2; + // if (idx > 0) { + // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; + // z[kdx+idx] = (jlong)yz_idx1; + // carry = (jlong)(yz_idx1 >>> 64); + // } + // + + Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; + + srliw(jdx, idx, 2); + + bind(L_third_loop); + + subw(jdx, jdx, 1); + bltz(jdx, L_third_loop_exit); + subw(idx, idx, 4); + + shadd(t0, idx, y, t0, LogBytesPerInt); + ld(yz_idx2, Address(t0, 0)); + ld(yz_idx1, Address(t0, wordSize)); + + shadd(tmp6, idx, z, t0, LogBytesPerInt); + + ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian + ror_imm(yz_idx2, yz_idx2, 32); + + ld(t1, Address(tmp6, 0)); + ld(t0, Address(tmp6, wordSize)); + + mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 + mulhu(tmp4, product_hi, yz_idx1); + + ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian + ror_imm(t1, t1, 32, tmp); + + mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp + mulhu(carry2, product_hi, yz_idx2); + + cad(tmp3, tmp3, carry, carry); + adc(tmp4, tmp4, zr, carry); + cad(tmp3, tmp3, t0, t0); + cadc(tmp4, tmp4, tmp, t0); + adc(carry, carry2, zr, t0); + cad(tmp4, tmp4, t1, carry2); + adc(carry, carry, zr, carry2); + + ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian + ror_imm(tmp4, tmp4, 32); + sd(tmp4, Address(tmp6, 0)); + sd(tmp3, Address(tmp6, wordSize)); + + j(L_third_loop); + + bind(L_third_loop_exit); + + andi(idx, idx, 0x3); + beqz(idx, L_post_third_loop_done); + + Label L_check_1; + subw(idx, idx, 2); + bltz(idx, L_check_1); + + shadd(t0, idx, y, t0, LogBytesPerInt); + ld(yz_idx1, Address(t0, 0)); + ror_imm(yz_idx1, yz_idx1, 32); + + mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 + mulhu(tmp4, product_hi, yz_idx1); + + shadd(t0, idx, z, t0, LogBytesPerInt); + ld(yz_idx2, Address(t0, 0)); + ror_imm(yz_idx2, yz_idx2, 32, tmp); + + add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); + + ror_imm(tmp3, tmp3, 32, tmp); + sd(tmp3, Address(t0, 0)); + + bind(L_check_1); + + andi(idx, idx, 0x1); + subw(idx, idx, 1); + bltz(idx, L_post_third_loop_done); + shadd(t0, idx, y, t0, LogBytesPerInt); + lwu(tmp4, Address(t0, 0)); + mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 + mulhu(carry2, tmp4, product_hi); + + shadd(t0, idx, z, t0, LogBytesPerInt); + lwu(tmp4, Address(t0, 0)); + + add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); + + shadd(t0, idx, z, t0, LogBytesPerInt); + sw(tmp3, Address(t0, 0)); + + slli(t0, carry2, 32); + srli(carry, tmp3, 32); + orr(carry, carry, t0); + + bind(L_post_third_loop_done); +} + +/** + * Code for BigInteger::multiplyToLen() intrinsic. + * + * x10: x + * x11: xlen + * x12: y + * x13: ylen + * x14: z + * x15: zlen + * x16: tmp1 + * x17: tmp2 + * x7: tmp3 + * x28: tmp4 + * x29: tmp5 + * x30: tmp6 + * x31: tmp7 + */ +void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, + Register z, Register zlen, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, + Register tmp5, Register tmp6, Register product_hi) +{ + assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); + + const Register idx = tmp1; + const Register kdx = tmp2; + const Register xstart = tmp3; + + const Register y_idx = tmp4; + const Register carry = tmp5; + const Register product = xlen; + const Register x_xstart = zlen; // reuse register + + mv(idx, ylen); // idx = ylen; + mv(kdx, zlen); // kdx = xlen+ylen; + mv(carry, zr); // carry = 0; + + Label L_multiply_64_x_64_loop, L_done; + + subw(xstart, xlen, 1); + bltz(xstart, L_done); + + const Register jdx = tmp1; + + if (AvoidUnalignedAccesses) { + // Check if x and y are both 8-byte aligned. + orr(t0, xlen, ylen); + andi(t0, t0, 0x1); + beqz(t0, L_multiply_64_x_64_loop); + + multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); + shadd(t0, xstart, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + + Label L_second_loop_unaligned; + bind(L_second_loop_unaligned); + mv(carry, zr); + mv(jdx, ylen); + subw(xstart, xstart, 1); + bltz(xstart, L_done); + sub(sp, sp, 2 * wordSize); + sd(z, Address(sp, 0)); + sd(zr, Address(sp, wordSize)); + shadd(t0, xstart, z, t0, LogBytesPerInt); + addi(z, t0, 4); + shadd(t0, xstart, x, t0, LogBytesPerInt); + lwu(product, Address(t0, 0)); + Label L_third_loop, L_third_loop_exit; + + blez(jdx, L_third_loop_exit); + + bind(L_third_loop); + subw(jdx, jdx, 1); + shadd(t0, jdx, y, t0, LogBytesPerInt); + lwu(t0, Address(t0, 0)); + mul(t1, t0, product); + add(t0, t1, carry); + shadd(tmp6, jdx, z, t1, LogBytesPerInt); + lwu(t1, Address(tmp6, 0)); + add(t0, t0, t1); + sw(t0, Address(tmp6, 0)); + srli(carry, t0, 32); + bgtz(jdx, L_third_loop); + + bind(L_third_loop_exit); + ld(z, Address(sp, 0)); + addi(sp, sp, 2 * wordSize); + shadd(t0, xstart, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + + j(L_second_loop_unaligned); + } + + bind(L_multiply_64_x_64_loop); + multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); + + Label L_second_loop_aligned; + beqz(kdx, L_second_loop_aligned); + + Label L_carry; + subw(kdx, kdx, 1); + beqz(kdx, L_carry); + + shadd(t0, kdx, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + srli(carry, carry, 32); + subw(kdx, kdx, 1); + + bind(L_carry); + shadd(t0, kdx, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + + // Second and third (nested) loops. + // + // for (int i = xstart-1; i >= 0; i--) { // Second loop + // carry = 0; + // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop + // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + + // (z[k] & LONG_MASK) + carry; + // z[k] = (int)product; + // carry = product >>> 32; + // } + // z[i] = (int)carry; + // } + // + // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi + + bind(L_second_loop_aligned); + mv(carry, zr); // carry = 0; + mv(jdx, ylen); // j = ystart+1 + + subw(xstart, xstart, 1); // i = xstart-1; + bltz(xstart, L_done); + + sub(sp, sp, 4 * wordSize); + sd(z, Address(sp, 0)); + + Label L_last_x; + shadd(t0, xstart, z, t0, LogBytesPerInt); + addi(z, t0, 4); + subw(xstart, xstart, 1); // i = xstart-1; + bltz(xstart, L_last_x); + + shadd(t0, xstart, x, t0, LogBytesPerInt); + ld(product_hi, Address(t0, 0)); + ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian + + Label L_third_loop_prologue; + bind(L_third_loop_prologue); + + sd(ylen, Address(sp, wordSize)); + sd(x, Address(sp, 2 * wordSize)); + sd(xstart, Address(sp, 3 * wordSize)); + multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, + tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); + ld(z, Address(sp, 0)); + ld(ylen, Address(sp, wordSize)); + ld(x, Address(sp, 2 * wordSize)); + ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen + addi(sp, sp, 4 * wordSize); + + addiw(tmp3, xlen, 1); + shadd(t0, tmp3, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + + subw(tmp3, tmp3, 1); + bltz(tmp3, L_done); + + srli(carry, carry, 32); + shadd(t0, tmp3, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + j(L_second_loop_aligned); + + // Next infrequent code is moved outside loops. + bind(L_last_x); + lwu(product_hi, Address(x, 0)); + j(L_third_loop_prologue); + + bind(L_done); +} +#endif + +// Count bits of trailing zero chars from lsb to msb until first non-zero element. +// For LL case, one byte for one element, so shift 8 bits once, and for other case, +// shift 16 bits once. +void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) +{ + if (UseRVB) { + assert_different_registers(Rd, Rs, tmp1); + int step = isLL ? 8 : 16; + ctz(Rd, Rs); + andi(tmp1, Rd, step - 1); + sub(Rd, Rd, tmp1); + return; + } + assert_different_registers(Rd, Rs, tmp1, tmp2); + Label Loop; + int step = isLL ? 8 : 16; + li(Rd, -step); + mv(tmp2, Rs); + + bind(Loop); + addi(Rd, Rd, step); + andi(tmp1, tmp2, ((1 << step) - 1)); + srli(tmp2, tmp2, step); + beqz(tmp1, Loop); +} + +// This instruction reads adjacent 4 bytes from the lower half of source register, +// inflate into a register, for example: +// Rs: A7A6A5A4A3A2A1A0 +// Rd: 00A300A200A100A0 +void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) +{ + assert_different_registers(Rd, Rs, tmp1, tmp2); + li(tmp1, 0xFF); + mv(Rd, zr); + for (int i = 0; i <= 3; i++) + { + andr(tmp2, Rs, tmp1); + if (i) { + slli(tmp2, tmp2, i * 8); + } + orr(Rd, Rd, tmp2); + if (i != 3) { + slli(tmp1, tmp1, 8); + } + } +} + +// This instruction reads adjacent 4 bytes from the upper half of source register, +// inflate into a register, for example: +// Rs: A7A6A5A4A3A2A1A0 +// Rd: 00A700A600A500A4 +void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) +{ + assert_different_registers(Rd, Rs, tmp1, tmp2); + li(tmp1, 0xFF00000000); + mv(Rd, zr); + for (int i = 0; i <= 3; i++) + { + andr(tmp2, Rs, tmp1); + orr(Rd, Rd, tmp2); + srli(Rd, Rd, 8); + if (i != 3) { + slli(tmp1, tmp1, 8); + } + } +} + +// The size of the blocks erased by the zero_blocks stub. We must +// handle anything smaller than this ourselves in zero_words(). +const int MacroAssembler::zero_words_block_size = 8; + +// zero_words() is used by C2 ClearArray patterns. It is as small as +// possible, handling small word counts locally and delegating +// anything larger to the zero_blocks stub. It is expanded many times +// in compiled code, so it is important to keep it short. + +// ptr: Address of a buffer to be zeroed. +// cnt: Count in HeapWords. +// +// ptr, cnt, and t0 are clobbered. +address MacroAssembler::zero_words(Register ptr, Register cnt) +{ + assert(is_power_of_2(zero_words_block_size), "adjust this"); + assert(ptr == x28 && cnt == x29, "mismatch in register usage"); + assert_different_registers(cnt, t0); + + BLOCK_COMMENT("zero_words {"); + mv(t0, zero_words_block_size); + Label around, done, done16; + bltu(cnt, t0, around); + { + RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks()); + assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); + if (StubRoutines::riscv::complete()) { + address tpc = trampoline_call(zero_blocks); + if (tpc == NULL) { + DEBUG_ONLY(reset_labels(around)); + postcond(pc() == badAddress); + return NULL; + } + } else { + jal(zero_blocks); + } + } + bind(around); + for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { + Label l; + andi(t0, cnt, i); + beqz(t0, l); + for (int j = 0; j < i; j++) { + sd(zr, Address(ptr, 0)); + addi(ptr, ptr, 8); + } + bind(l); + } + { + Label l; + andi(t0, cnt, 1); + beqz(t0, l); + sd(zr, Address(ptr, 0)); + bind(l); + } + BLOCK_COMMENT("} zero_words"); + postcond(pc() != badAddress); + return pc(); +} + +#define SmallArraySize (18 * BytesPerLong) + +// base: Address of a buffer to be zeroed, 8 bytes aligned. +// cnt: Immediate count in HeapWords. +void MacroAssembler::zero_words(Register base, u_int64_t cnt) +{ + assert_different_registers(base, t0, t1); + + BLOCK_COMMENT("zero_words {"); + + if (cnt <= SmallArraySize / BytesPerLong) { + for (int i = 0; i < (int)cnt; i++) { + sd(zr, Address(base, i * wordSize)); + } + } else { + const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll + int remainder = cnt % unroll; + for (int i = 0; i < remainder; i++) { + sd(zr, Address(base, i * wordSize)); + } + + Label loop; + Register cnt_reg = t0; + Register loop_base = t1; + cnt = cnt - remainder; + li(cnt_reg, cnt); + add(loop_base, base, remainder * wordSize); + bind(loop); + sub(cnt_reg, cnt_reg, unroll); + for (int i = 0; i < unroll; i++) { + sd(zr, Address(loop_base, i * wordSize)); + } + add(loop_base, loop_base, unroll * wordSize); + bnez(cnt_reg, loop); + } + + BLOCK_COMMENT("} zero_words"); +} + +// base: Address of a buffer to be filled, 8 bytes aligned. +// cnt: Count in 8-byte unit. +// value: Value to be filled with. +// base will point to the end of the buffer after filling. +void MacroAssembler::fill_words(Register base, Register cnt, Register value) +{ +// Algorithm: +// +// t0 = cnt & 7 +// cnt -= t0 +// p += t0 +// switch (t0): +// switch start: +// do while cnt +// cnt -= 8 +// p[-8] = value +// case 7: +// p[-7] = value +// case 6: +// p[-6] = value +// // ... +// case 1: +// p[-1] = value +// case 0: +// p += 8 +// do-while end +// switch end + + assert_different_registers(base, cnt, value, t0, t1); + + Label fini, skip, entry, loop; + const int unroll = 8; // Number of sd instructions we'll unroll + + beqz(cnt, fini); + + andi(t0, cnt, unroll - 1); + sub(cnt, cnt, t0); + // align 8, so first sd n % 8 = mod, next loop sd 8 * n. + shadd(base, t0, base, t1, 3); + la(t1, entry); + slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) + sub(t1, t1, t0); + jr(t1); + + bind(loop); + add(base, base, unroll * 8); + for (int i = -unroll; i < 0; i++) { + sd(value, Address(base, i * 8)); + } + bind(entry); + sub(cnt, cnt, unroll); + bgez(cnt, loop); + + bind(fini); +} + +#define FCVT_SAFE(FLOATCVT, FLOATEQ) \ +void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ + Label L_Okay; \ + fscsr(zr); \ + FLOATCVT(dst, src); \ + frcsr(tmp); \ + andi(tmp, tmp, 0x1E); \ + beqz(tmp, L_Okay); \ + FLOATEQ(tmp, src, src); \ + bnez(tmp, L_Okay); \ + mv(dst, zr); \ + bind(L_Okay); \ +} + +FCVT_SAFE(fcvt_w_s, feq_s) +FCVT_SAFE(fcvt_l_s, feq_s) +FCVT_SAFE(fcvt_w_d, feq_d) +FCVT_SAFE(fcvt_l_d, feq_d) + +#undef FCVT_SAFE + +#define FCMP(FLOATTYPE, FLOATSIG) \ +void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ + FloatRegister Rs2, int unordered_result) { \ + Label Ldone; \ + if (unordered_result < 0) { \ + /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ + /* installs 1 if gt else 0 */ \ + flt_##FLOATSIG(result, Rs2, Rs1); \ + /* Rs1 > Rs2, install 1 */ \ + bgtz(result, Ldone); \ + feq_##FLOATSIG(result, Rs1, Rs2); \ + addi(result, result, -1); \ + /* Rs1 = Rs2, install 0 */ \ + /* NaN or Rs1 < Rs2, install -1 */ \ + bind(Ldone); \ + } else { \ + /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ + /* installs 1 if gt or unordered else 0 */ \ + flt_##FLOATSIG(result, Rs1, Rs2); \ + /* Rs1 < Rs2, install -1 */ \ + bgtz(result, Ldone); \ + feq_##FLOATSIG(result, Rs1, Rs2); \ + addi(result, result, -1); \ + /* Rs1 = Rs2, install 0 */ \ + /* NaN or Rs1 > Rs2, install 1 */ \ + bind(Ldone); \ + neg(result, result); \ + } \ +} + +FCMP(float, s); +FCMP(double, d); + +#undef FCMP + +// Zero words; len is in bytes +// Destroys all registers except addr +// len must be a nonzero multiple of wordSize +void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { + assert_different_registers(addr, len, tmp, t0, t1); + +#ifdef ASSERT + { + Label L; + andi(t0, len, BytesPerWord - 1); + beqz(t0, L); + stop("len is not a multiple of BytesPerWord"); + bind(L); + } +#endif // ASSERT + +#ifndef PRODUCT + block_comment("zero memory"); +#endif // PRODUCT + + Label loop; + Label entry; + + // Algorithm: + // + // t0 = cnt & 7 + // cnt -= t0 + // p += t0 + // switch (t0) { + // do { + // cnt -= 8 + // p[-8] = 0 + // case 7: + // p[-7] = 0 + // case 6: + // p[-6] = 0 + // ... + // case 1: + // p[-1] = 0 + // case 0: + // p += 8 + // } while (cnt) + // } + + const int unroll = 8; // Number of sd(zr) instructions we'll unroll + + srli(len, len, LogBytesPerWord); + andi(t0, len, unroll - 1); // t0 = cnt % unroll + sub(len, len, t0); // cnt -= unroll + // tmp always points to the end of the region we're about to zero + shadd(tmp, t0, addr, t1, LogBytesPerWord); + la(t1, entry); + slli(t0, t0, 2); + sub(t1, t1, t0); + jr(t1); + bind(loop); + sub(len, len, unroll); + for (int i = -unroll; i < 0; i++) { + Assembler::sd(zr, Address(tmp, i * wordSize)); + } + bind(entry); + add(tmp, tmp, unroll * wordSize); + bnez(len, loop); +} + +// shift left by shamt and add +// Rd = (Rs1 << shamt) + Rs2 +void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { + if (UseRVB) { + if (shamt == 1) { + sh1add(Rd, Rs1, Rs2); + return; + } else if (shamt == 2) { + sh2add(Rd, Rs1, Rs2); + return; + } else if (shamt == 3) { + sh3add(Rd, Rs1, Rs2); + return; + } + } + + if (shamt != 0) { + slli(tmp, Rs1, shamt); + add(Rd, Rs2, tmp); + } else { + add(Rd, Rs1, Rs2); + } +} + +void MacroAssembler::zero_extend(Register dst, Register src, int bits) { + if (UseRVB) { + if (bits == 16) { + zext_h(dst, src); + return; + } else if (bits == 32) { + zext_w(dst, src); + return; + } + } + + if (bits == 8) { + zext_b(dst, src); + } else { + slli(dst, src, XLEN - bits); + srli(dst, dst, XLEN - bits); + } +} + +void MacroAssembler::sign_extend(Register dst, Register src, int bits) { + if (UseRVB) { + if (bits == 8) { + sext_b(dst, src); + return; + } else if (bits == 16) { + sext_h(dst, src); + return; + } + } + + if (bits == 32) { + sext_w(dst, src); + } else { + slli(dst, src, XLEN - bits); + srai(dst, dst, XLEN - bits); + } +} + +void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) +{ + if (src1 == src2) { + mv(dst, zr); + return; + } + Label done; + Register left = src1; + Register right = src2; + if (dst == src1) { + assert_different_registers(dst, src2, tmp); + mv(tmp, src1); + left = tmp; + } else if (dst == src2) { + assert_different_registers(dst, src1, tmp); + mv(tmp, src2); + right = tmp; + } + + // installs 1 if gt else 0 + slt(dst, right, left); + bnez(dst, done); + slt(dst, left, right); + // dst = -1 if lt; else if eq , dst = 0 + neg(dst, dst); + bind(done); +} + +void MacroAssembler::safepoint_ifence() { + ifence(); +#ifndef PRODUCT + if (VerifyCrossModifyFence) { + // Clear the thread state. + sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); + } +#endif +} + +#ifndef PRODUCT +void MacroAssembler::verify_cross_modify_fence_not_required() { + if (VerifyCrossModifyFence) { + // Check if thread needs a cross modify fence. + lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); + Label fence_not_required; + beqz(t0, fence_not_required); + // If it does then fail. + la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure))); + mv(c_rarg0, xthread); + jalr(t0); + bind(fence_not_required); + } +} +#endif Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -0,0 +1,881 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP +#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP + +#include "asm/assembler.hpp" +#include "metaprogramming/enableIf.hpp" +#include "oops/compressedOops.hpp" +#include "utilities/powerOfTwo.hpp" + +// MacroAssembler extends Assembler by frequently used macros. +// +// Instructions for which a 'better' code sequence exists depending +// on arguments should also go in here. + +class MacroAssembler: public Assembler { + + public: + MacroAssembler(CodeBuffer* code) : Assembler(code) { + } + virtual ~MacroAssembler() {} + + void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod); + + // Place a fence.i after code may have been modified due to a safepoint. + void safepoint_ifence(); + + // Alignment + void align(int modulus, int extra_offset = 0); + + // Stack frame creation/removal + // Note that SP must be updated to the right place before saving/restoring RA and FP + // because signal based thread suspend/resume could happen asynchronously. + void enter() { + addi(sp, sp, - 2 * wordSize); + sd(ra, Address(sp, wordSize)); + sd(fp, Address(sp)); + addi(fp, sp, 2 * wordSize); + } + + void leave() { + addi(sp, fp, - 2 * wordSize); + ld(fp, Address(sp)); + ld(ra, Address(sp, wordSize)); + addi(sp, sp, 2 * wordSize); + } + + + // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) + // The pointer will be loaded into the thread register. + void get_thread(Register thread); + + // Support for VM calls + // + // It is imperative that all calls into the VM are handled via the call_VM macros. + // They make sure that the stack linkage is setup correctly. call_VM's correspond + // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. + + void call_VM(Register oop_result, + address entry_point, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + // Overloadings with last_Java_sp + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments = 0, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + void get_vm_result(Register oop_result, Register java_thread); + void get_vm_result_2(Register metadata_result, Register java_thread); + + // These always tightly bind to MacroAssembler::call_VM_leaf_base + // bypassing the virtual implementation + void call_VM_leaf(address entry_point, + int number_of_arguments = 0); + void call_VM_leaf(address entry_point, + Register arg_0); + void call_VM_leaf(address entry_point, + Register arg_0, Register arg_1); + void call_VM_leaf(address entry_point, + Register arg_0, Register arg_1, Register arg_2); + + // These always tightly bind to MacroAssembler::call_VM_base + // bypassing the virtual implementation + void super_call_VM_leaf(address entry_point, Register arg_0); + void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1); + void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2); + void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3); + + // last Java Frame (fills frame anchor) + void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp); + void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp); + void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp); + + // thread in the default location (xthread) + void reset_last_Java_frame(bool clear_fp); + + void call_native(address entry_point, + Register arg_0); + void call_native_base( + address entry_point, // the entry point + Label* retaddr = NULL + ); + + virtual void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments, // the number of arguments to pop after the call + Label* retaddr = NULL + ); + + virtual void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments, // the number of arguments to pop after the call + Label& retaddr) { + call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); + } + + virtual void call_VM_base( // returns the register containing the thread upon return + Register oop_result, // where an oop-result ends up if any; use noreg otherwise + Register java_thread, // the thread if computed before ; use noreg otherwise + Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise + address entry_point, // the entry point + int number_of_arguments, // the number of arguments (w/o thread) to pop after the call + bool check_exceptions // whether to check for pending exceptions after return + ); + + void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions); + + virtual void check_and_handle_earlyret(Register java_thread); + virtual void check_and_handle_popframe(Register java_thread); + + void resolve_weak_handle(Register result, Register tmp); + void resolve_oop_handle(Register result, Register tmp = x15); + void resolve_jobject(Register value, Register thread, Register tmp); + + void movoop(Register dst, jobject obj, bool immediate = false); + void mov_metadata(Register dst, Metadata* obj); + void bang_stack_size(Register size, Register tmp); + void set_narrow_oop(Register dst, jobject obj); + void set_narrow_klass(Register dst, Klass* k); + + void load_mirror(Register dst, Register method, Register tmp = x15); + void access_load_at(BasicType type, DecoratorSet decorators, Register dst, + Address src, Register tmp1, Register thread_tmp); + void access_store_at(BasicType type, DecoratorSet decorators, Address dst, + Register src, Register tmp1, Register thread_tmp); + void load_klass(Register dst, Register src); + void store_klass(Register dst, Register src); + void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L); + + void encode_klass_not_null(Register r); + void decode_klass_not_null(Register r); + void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); + void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); + void decode_heap_oop_not_null(Register r); + void decode_heap_oop_not_null(Register dst, Register src); + void decode_heap_oop(Register d, Register s); + void decode_heap_oop(Register r) { decode_heap_oop(r, r); } + void encode_heap_oop(Register d, Register s); + void encode_heap_oop(Register r) { encode_heap_oop(r, r); }; + void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + + void store_klass_gap(Register dst, Register src); + + // currently unimplemented + // Used for storing NULL. All other oop constants should be + // stored using routines that take a jobject. + void store_heap_oop_null(Address dst); + + // This dummy is to prevent a call to store_heap_oop from + // converting a zero (linke NULL) into a Register by giving + // the compiler two choices it can't resolve + + void store_heap_oop(Address dst, void* dummy); + + // Support for NULL-checks + // + // Generates code that causes a NULL OS exception if the content of reg is NULL. + // If the accessed location is M[reg + offset] and the offset is known, provide the + // offset. No explicit code generateion is needed if the offset is within a certain + // range (0 <= offset <= page_size). + + virtual void null_check(Register reg, int offset = -1); + static bool needs_explicit_null_check(intptr_t offset); + static bool uses_implicit_null_check(void* address); + + // idiv variant which deals with MINLONG as dividend and -1 as divisor + int corrected_idivl(Register result, Register rs1, Register rs2, + bool want_remainder); + int corrected_idivq(Register result, Register rs1, Register rs2, + bool want_remainder); + + // interface method calling + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_tmp, + Label& no_such_interface, + bool return_method = true); + + // virtual method calling + // n.n. x86 allows RegisterOrConstant for vtable_index + void lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result); + + // Form an addres from base + offset in Rd. Rd my or may not + // actually be used: you must use the Address that is returned. It + // is up to you to ensure that the shift provided mathces the size + // of your data. + Address form_address(Register Rd, Register base, long byte_offset); + + // allocation + void tlab_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register tmp1, // temp register + Register tmp2, // temp register + Label& slow_case, // continuation point of fast allocation fails + bool is_far = false + ); + + void eden_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register tmp, // temp register + Label& slow_case, // continuation point if fast allocation fails + bool is_far = false + ); + + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except tmp_reg + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register tmp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + Register super_check_offset = noreg); + + // The reset of the type cehck; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable. + // Updates the sub's secondary super cache as necessary. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register tmp1_reg, + Register tmp2_reg, + Label* L_success, + Label* L_failure); + + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register tmp_reg, + Label& L_success); + + Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + + // only if +VerifyOops + void verify_oop(Register reg, const char* s = "broken oop"); + void verify_oop_addr(Address addr, const char* s = "broken oop addr"); + + void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {} + void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {} + +#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) +#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + + // A more convenient access to fence for our purposes + // We used four bit to indicate the read and write bits in the predecessors and successors, + // and extended i for r, o for w if UseConservativeFence enabled. + enum Membar_mask_bits { + StoreStore = 0b0101, // (pred = ow + succ = ow) + LoadStore = 0b1001, // (pred = ir + succ = ow) + StoreLoad = 0b0110, // (pred = ow + succ = ir) + LoadLoad = 0b1010, // (pred = ir + succ = ir) + AnyAny = LoadStore | StoreLoad // (pred = iorw + succ = iorw) + }; + + void membar(uint32_t order_constraint); + + static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) { + predecessor = (order_constraint >> 2) & 0x3; + successor = order_constraint & 0x3; + + // extend rw -> iorw: + // 01(w) -> 0101(ow) + // 10(r) -> 1010(ir) + // 11(rw)-> 1111(iorw) + if (UseConservativeFence) { + predecessor |= predecessor << 2; + successor |= successor << 2; + } + } + + static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) { + return ((predecessor & 0x3) << 2) | (successor & 0x3); + } + + // prints msg, dumps registers and stops execution + void stop(const char* msg); + + static void debug64(char* msg, int64_t pc, int64_t regs[]); + + void unimplemented(const char* what = ""); + + void should_not_reach_here() { stop("should not reach here"); } + + static address target_addr_for_insn(address insn_addr); + + // Required platform-specific helpers for Label::patch_instructions. + // They _shadow_ the declarations in AbstractAssembler, which are undefined. + static int pd_patch_instruction_size(address branch, address target); + static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) { + pd_patch_instruction_size(branch, target); + } + static address pd_call_destination(address branch) { + return target_addr_for_insn(branch); + } + + static int patch_oop(address insn_addr, address o); + address emit_trampoline_stub(int insts_call_instruction_offset, address target); + void emit_static_call_stub(); + + // The following 4 methods return the offset of the appropriate move instruction + + // Support for fast byte/short loading with zero extension (depending on particular CPU) + int load_unsigned_byte(Register dst, Address src); + int load_unsigned_short(Register dst, Address src); + + // Support for fast byte/short loading with sign extension (depending on particular CPU) + int load_signed_byte(Register dst, Address src); + int load_signed_short(Register dst, Address src); + + // Load and store values by size and signed-ness + void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); + void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); + + public: + // Standard pseudoinstruction + void nop(); + void mv(Register Rd, Register Rs); + void notr(Register Rd, Register Rs); + void neg(Register Rd, Register Rs); + void negw(Register Rd, Register Rs); + void sext_w(Register Rd, Register Rs); + void zext_b(Register Rd, Register Rs); + void seqz(Register Rd, Register Rs); // set if = zero + void snez(Register Rd, Register Rs); // set if != zero + void sltz(Register Rd, Register Rs); // set if < zero + void sgtz(Register Rd, Register Rs); // set if > zero + + // Float pseudoinstruction + void fmv_s(FloatRegister Rd, FloatRegister Rs); + void fabs_s(FloatRegister Rd, FloatRegister Rs); // single-precision absolute value + void fneg_s(FloatRegister Rd, FloatRegister Rs); + + // Double pseudoinstruction + void fmv_d(FloatRegister Rd, FloatRegister Rs); + void fabs_d(FloatRegister Rd, FloatRegister Rs); + void fneg_d(FloatRegister Rd, FloatRegister Rs); + + // Pseudoinstruction for control and status register + void rdinstret(Register Rd); // read instruction-retired counter + void rdcycle(Register Rd); // read cycle counter + void rdtime(Register Rd); // read time + void csrr(Register Rd, unsigned csr); // read csr + void csrw(unsigned csr, Register Rs); // write csr + void csrs(unsigned csr, Register Rs); // set bits in csr + void csrc(unsigned csr, Register Rs); // clear bits in csr + void csrwi(unsigned csr, unsigned imm); + void csrsi(unsigned csr, unsigned imm); + void csrci(unsigned csr, unsigned imm); + void frcsr(Register Rd); // read float-point csr + void fscsr(Register Rd, Register Rs); // swap float-point csr + void fscsr(Register Rs); // write float-point csr + void frrm(Register Rd); // read float-point rounding mode + void fsrm(Register Rd, Register Rs); // swap float-point rounding mode + void fsrm(Register Rs); // write float-point rounding mode + void fsrmi(Register Rd, unsigned imm); + void fsrmi(unsigned imm); + void frflags(Register Rd); // read float-point exception flags + void fsflags(Register Rd, Register Rs); // swap float-point exception flags + void fsflags(Register Rs); // write float-point exception flags + void fsflagsi(Register Rd, unsigned imm); + void fsflagsi(unsigned imm); + + void beqz(Register Rs, const address &dest); + void bnez(Register Rs, const address &dest); + void blez(Register Rs, const address &dest); + void bgez(Register Rs, const address &dest); + void bltz(Register Rs, const address &dest); + void bgtz(Register Rs, const address &dest); + void la(Register Rd, Label &label); + void la(Register Rd, const address &dest); + void la(Register Rd, const Address &adr); + //label + void beqz(Register Rs, Label &l, bool is_far = false); + void bnez(Register Rs, Label &l, bool is_far = false); + void blez(Register Rs, Label &l, bool is_far = false); + void bgez(Register Rs, Label &l, bool is_far = false); + void bltz(Register Rs, Label &l, bool is_far = false); + void bgtz(Register Rs, Label &l, bool is_far = false); + void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + + void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } } + void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } } + void push_reg(Register Rs); + void pop_reg(Register Rd); + int push_reg(unsigned int bitset, Register stack); + int pop_reg(unsigned int bitset, Register stack); + void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } + void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } +#ifdef COMPILER2 + void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); } + void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); } +#endif // COMPILER2 + + // Push and pop everything that might be clobbered by a native + // runtime call except t0 and t1. (They are always + // temporary registers, so we don't have to protect them.) + // Additional registers can be excluded in a passed RegSet. + void push_call_clobbered_registers_except(RegSet exclude); + void pop_call_clobbered_registers_except(RegSet exclude); + + void push_call_clobbered_registers() { + push_call_clobbered_registers_except(RegSet()); + } + void pop_call_clobbered_registers() { + pop_call_clobbered_registers_except(RegSet()); + } + + void pusha(); + void popa(); + void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0); + void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0); + + // if heap base register is used - reinit it with the correct value + void reinit_heapbase(); + + void bind(Label& L) { + Assembler::bind(L); + // fences across basic blocks should not be merged + code()->clear_last_insn(); + } + + // mv + template::value)> + inline void mv(Register Rd, T o) { + li(Rd, (int64_t)o); + } + + inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } + + void mv(Register Rd, Address dest); + void mv(Register Rd, address addr); + void mv(Register Rd, RegisterOrConstant src); + + // logic + void andrw(Register Rd, Register Rs1, Register Rs2); + void orrw(Register Rd, Register Rs1, Register Rs2); + void xorrw(Register Rd, Register Rs1, Register Rs2); + + // revb + void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend + void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend + void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend + void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend + void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower + void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword + void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word + void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword + + void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); + void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0); + void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1); + + void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail); + void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail); + void cmpxchg(Register addr, Register expected, + Register new_val, + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, bool result_as_bool = false); + void cmpxchg_weak(Register addr, Register expected, + Register new_val, + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result); + void cmpxchg_narrow_value_helper(Register addr, Register expected, + Register new_val, + enum operand_size size, + Register tmp1, Register tmp2, Register tmp3); + void cmpxchg_narrow_value(Register addr, Register expected, + Register new_val, + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, bool result_as_bool, + Register tmp1, Register tmp2, Register tmp3); + void weak_cmpxchg_narrow_value(Register addr, Register expected, + Register new_val, + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, + Register tmp1, Register tmp2, Register tmp3); + + void atomic_add(Register prev, RegisterOrConstant incr, Register addr); + void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); + void atomic_addal(Register prev, RegisterOrConstant incr, Register addr); + void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr); + + void atomic_xchg(Register prev, Register newv, Register addr); + void atomic_xchgw(Register prev, Register newv, Register addr); + void atomic_xchgal(Register prev, Register newv, Register addr); + void atomic_xchgalw(Register prev, Register newv, Register addr); + void atomic_xchgwu(Register prev, Register newv, Register addr); + void atomic_xchgalwu(Register prev, Register newv, Register addr); + + // Biased locking support + // lock_reg and obj_reg must be loaded up with the appropriate values. + // swap_reg is killed. + // tmp_reg must be supplied and must not be rscratch1 or rscratch2 + // Optional slow case is for implementations (interpreter and C1) which branch to + // slow case directly. Leaves condition codes set for C2's Fast_Lock node. + void biased_locking_enter(Register lock_reg, Register obj_reg, + Register swap_reg, Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, Label* slow_case = NULL, + BiasedLockingCounters* counters = NULL, + Register flag = noreg); + void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done, Register flag = noreg); + + + static bool far_branches() { + return ReservedCodeCacheSize > branch_range; + } + + //atomic + void atomic_incw(Register counter_addr, Register tmp1); + void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { + la(tmp1, counter_addr); + atomic_incw(tmp1, tmp2); + } + + // Jumps that can reach anywhere in the code cache. + // Trashes tmp. + void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); + void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); + + static int far_branch_size() { + if (far_branches()) { + return 2 * 4; // auipc + jalr, see far_call() & far_jump() + } else { + return 4; + } + } + + void load_byte_map_base(Register reg); + + void bang_stack_with_offset(int offset) { + // stack grows down, caller passes positive offset + assert(offset > 0, "must bang with negative offset"); + sub(t0, sp, offset); + sd(zr, Address(t0)); + } + + void la_patchable(Register reg1, const Address &dest, int32_t &offset); + + virtual void _call_Unimplemented(address call_site) { + mv(t1, call_site); + } + + #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) + + // Frame creation and destruction shared between JITs. + void build_frame(int framesize); + void remove_frame(int framesize); + + void reserved_stack_check(); + + void get_polling_page(Register dest, relocInfo::relocType rtype); + address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); + + address trampoline_call(Address entry, CodeBuffer* cbuf = NULL); + address ic_call(address entry, jint method_index = 0); + + void add_memory_int64(const Address dst, int64_t imm); + void add_memory_int32(const Address dst, int32_t imm); + + void cmpptr(Register src1, Address src2, Label& equal); + + void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL); + void load_method_holder_cld(Register result, Register method); + void load_method_holder(Register holder, Register method); + + void compute_index(Register str1, Register trailing_zeros, Register match_mask, + Register result, Register char_tmp, Register tmp, + bool haystack_isL); + void compute_match_mask(Register src, Register pattern, Register match_mask, + Register mask1, Register mask2); + +#ifdef COMPILER2 + void mul_add(Register out, Register in, Register offset, + Register len, Register k, Register tmp); + void cad(Register dst, Register src1, Register src2, Register carry); + void cadc(Register dst, Register src1, Register src2, Register carry); + void adc(Register dst, Register src1, Register src2, Register carry); + void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, + Register src1, Register src2, Register carry); + void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx); + void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx); + void multiply_128_x_128_loop(Register y, Register z, + Register carry, Register carry2, + Register idx, Register jdx, + Register yz_idx1, Register yz_idx2, + Register tmp, Register tmp3, Register tmp4, + Register tmp6, Register product_hi); + void multiply_to_len(Register x, Register xlen, Register y, Register ylen, + Register z, Register zlen, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, + Register tmp5, Register tmp6, Register product_hi); +#endif + + void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); + void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); + + void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1); + + void zero_words(Register base, u_int64_t cnt); + address zero_words(Register ptr, Register cnt); + void fill_words(Register base, Register cnt, Register value); + void zero_memory(Register addr, Register len, Register tmp); + + // shift left by shamt and add + void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt); + + // Here the float instructions with safe deal with some exceptions. + // e.g. convert from NaN, +Inf, -Inf to int, float, double + // will trigger exception, we need to deal with these situations + // to get correct results. + void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0); + void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0); + void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0); + void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0); + + // vector load/store unit-stride instructions + void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { + switch (sew) { + case Assembler::e64: + vle64_v(vd, base, vm); + break; + case Assembler::e32: + vle32_v(vd, base, vm); + break; + case Assembler::e16: + vle16_v(vd, base, vm); + break; + case Assembler::e8: // fall through + default: + vle8_v(vd, base, vm); + break; + } + } + + void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { + switch (sew) { + case Assembler::e64: + vse64_v(store_data, base, vm); + break; + case Assembler::e32: + vse32_v(store_data, base, vm); + break; + case Assembler::e16: + vse16_v(store_data, base, vm); + break; + case Assembler::e8: // fall through + default: + vse8_v(store_data, base, vm); + break; + } + } + + static const int zero_words_block_size; + + void cast_primitive_type(BasicType type, Register Rt) { + switch (type) { + case T_BOOLEAN: + sltu(Rt, zr, Rt); + break; + case T_CHAR : + zero_extend(Rt, Rt, 16); + break; + case T_BYTE : + sign_extend(Rt, Rt, 8); + break; + case T_SHORT : + sign_extend(Rt, Rt, 16); + break; + case T_INT : + addw(Rt, Rt, zr); + break; + case T_LONG : /* nothing to do */ break; + case T_VOID : /* nothing to do */ break; + case T_FLOAT : /* nothing to do */ break; + case T_DOUBLE : /* nothing to do */ break; + default: ShouldNotReachHere(); + } + } + + // float cmp with unordered_result + void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); + void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); + + // Zero/Sign-extend + void zero_extend(Register dst, Register src, int bits); + void sign_extend(Register dst, Register src, int bits); + + // compare src1 and src2 and get -1/0/1 in dst. + // if [src1 > src2], dst = 1; + // if [src1 == src2], dst = 0; + // if [src1 < src2], dst = -1; + void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0); + + int push_fp(unsigned int bitset, Register stack); + int pop_fp(unsigned int bitset, Register stack); + + int push_vp(unsigned int bitset, Register stack); + int pop_vp(unsigned int bitset, Register stack); + + // vext + void vmnot_m(VectorRegister vd, VectorRegister vs); + void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); + void vfneg_v(VectorRegister vd, VectorRegister vs); + +private: + +#ifdef ASSERT + // Template short-hand support to clean-up after a failed call to trampoline + // call generation (see trampoline_call() below), when a set of Labels must + // be reset (before returning). + template + void reset_labels(Label& lbl, More&... more) { + lbl.reset(); reset_labels(more...); + } + template + void reset_labels(Label& lbl) { + lbl.reset(); + } +#endif + void load_prototype_header(Register dst, Register src); + void repne_scan(Register addr, Register value, Register count, Register tmp); + + // Return true if an address is within the 48-bit RISCV64 address space. + bool is_valid_riscv64_address(address addr) { + return ((uintptr_t)addr >> 48) == 0; + } + + void ld_constant(Register dest, const Address &const_addr) { + if (NearCpool) { + ld(dest, const_addr); + } else { + int32_t offset = 0; + la_patchable(dest, InternalAddress(const_addr.target()), offset); + ld(dest, Address(dest, offset)); + } + } + + int bitset_to_regs(unsigned int bitset, unsigned char* regs); + Address add_memory_helper(const Address dst); + + void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); + void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); + + // Check the current thread doesn't need a cross modify fence. + void verify_cross_modify_fence_not_required() PRODUCT_RETURN; +}; + +#ifdef ASSERT +inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } +#endif + +/** + * class SkipIfEqual: + * + * Instantiating this class will result in assembly code being output that will + * jump around any code emitted between the creation of the instance and it's + * automatic destruction at the end of a scope block, depending on the value of + * the flag passed to the constructor, which will be checked at run-time. + */ +class SkipIfEqual { + private: + MacroAssembler* _masm; + Label _label; + + public: + SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); + ~SkipIfEqual(); +}; + +#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP +#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP + +// Still empty. + +#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/matcher_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/matcher_riscv.hpp @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_MATCHER_RISCV_HPP +#define CPU_RISCV_MATCHER_RISCV_HPP + + // Defined within class Matcher + + // false => size gets scaled to BytesPerLong, ok. + static const bool init_array_count_is_in_bytes = false; + + // Whether this platform implements the scalable vector feature + static const bool implements_scalable_vector = true; + + static const bool supports_scalable_vector() { + return UseRVV; + } + + // riscv supports misaligned vectors store/load. + static constexpr bool misaligned_vectors_ok() { + return true; + } + + // Whether code generation need accurate ConvI2L types. + static const bool convi2l_type_required = false; + + // Does the CPU require late expand (see block.cpp for description of late expand)? + static const bool require_postalloc_expand = false; + + // Do we need to mask the count passed to shift instructions or does + // the cpu only look at the lower 5/6 bits anyway? + static const bool need_masked_shift_count = false; + + // No support for generic vector operands. + static const bool supports_generic_vector_operands = false; + + static constexpr bool isSimpleConstant64(jlong value) { + // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. + // Probably always true, even if a temp register is required. + return true; + } + + // Use conditional move (CMOVL) + static constexpr int long_cmove_cost() { + // long cmoves are no more expensive than int cmoves + return 0; + } + + static constexpr int float_cmove_cost() { + // float cmoves are no more expensive than int cmoves + return 0; + } + + // This affects two different things: + // - how Decode nodes are matched + // - how ImplicitNullCheck opportunities are recognized + // If true, the matcher will try to remove all Decodes and match them + // (as operands) into nodes. NullChecks are not prepared to deal with + // Decodes by final_graph_reshaping(). + // If false, final_graph_reshaping() forces the decode behind the Cmp + // for a NullCheck. The matcher matches the Decode node into a register. + // Implicit_null_check optimization moves the Decode along with the + // memory operation back up before the NullCheck. + static bool narrow_oop_use_complex_address() { + return CompressedOops::shift() == 0; + } + + static bool narrow_klass_use_complex_address() { + return false; + } + + static bool const_oop_prefer_decode() { + // Prefer ConN+DecodeN over ConP in simple compressed oops mode. + return CompressedOops::base() == NULL; + } + + static bool const_klass_prefer_decode() { + // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. + return CompressedKlassPointers::base() == NULL; + } + + // Is it better to copy float constants, or load them directly from + // memory? Intel can load a float constant from a direct address, + // requiring no extra registers. Most RISCs will have to materialize + // an address into a register first, so they would do better to copy + // the constant from stack. + static const bool rematerialize_float_constants = false; + + // If CPU can load and store mis-aligned doubles directly then no + // fixup is needed. Else we split the double into 2 integer pieces + // and move it piece-by-piece. Only happens when passing doubles into + // C code as the Java calling convention forces doubles to be aligned. + static const bool misaligned_doubles_ok = true; + + // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. + static const bool strict_fp_requires_explicit_rounding = false; + + // Are floats converted to double when stored to stack during + // deoptimization? + static constexpr bool float_in_double() { return false; } + + // Do ints take an entire long register or just half? + // The relevant question is how the int is callee-saved: + // the whole long is written but de-opt'ing will have to extract + // the relevant 32 bits. + static const bool int_in_long = true; + + // Does the CPU supports vector variable shift instructions? + static constexpr bool supports_vector_variable_shifts(void) { + return false; + } + + // Does the CPU supports vector variable rotate instructions? + static constexpr bool supports_vector_variable_rotates(void) { + return false; + } + + // Does the CPU supports vector unsigned comparison instructions? + static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { + return false; + } + + // Some microarchitectures have mask registers used on vectors + static const bool has_predicated_vectors(void) { + return false; + } + + // true means we have fast l2f convers + // false means that conversion is done by runtime call + static constexpr bool convL2FSupported(void) { + return true; + } + + // Implements a variant of EncodeISOArrayNode that encode ASCII only + static const bool supports_encode_ascii_array = false; + +#endif // CPU_RISCV_MATCHER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/methodHandles_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/methodHandles_riscv.cpp @@ -0,0 +1,461 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "classfile/javaClasses.inline.hpp" +#include "classfile/vmClasses.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/flags/flagSetting.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/stubRoutines.hpp" + +#define __ _masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { + assert_cond(_masm != NULL); + if (VerifyMethodHandles) { + verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class), + "MH argument is a Class"); + } + __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset())); +} + +#ifdef ASSERT +static int check_nonzero(const char* xname, int x) { + assert(x != 0, "%s should be nonzero", xname); + return x; +} +#define NONZERO(x) check_nonzero(#x, x) +#else //ASSERT +#define NONZERO(x) (x) +#endif //PRODUCT + +#ifdef ASSERT +void MethodHandles::verify_klass(MacroAssembler* _masm, + Register obj, vmClassID klass_id, + const char* error_message) { + assert_cond(_masm != NULL); + InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id); + Klass* klass = vmClasses::klass_at(klass_id); + Register temp = t1; + Register temp2 = t0; // used by MacroAssembler::cmpptr + Label L_ok, L_bad; + BLOCK_COMMENT("verify_klass {"); + __ verify_oop(obj); + __ beqz(obj, L_bad); + __ push_reg(RegSet::of(temp, temp2), sp); + __ load_klass(temp, obj); + __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok); + intptr_t super_check_offset = klass->super_check_offset(); + __ ld(temp, Address(temp, super_check_offset)); + __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok); + __ pop_reg(RegSet::of(temp, temp2), sp); + __ bind(L_bad); + __ stop(error_message); + __ BIND(L_ok); + __ pop_reg(RegSet::of(temp, temp2), sp); + BLOCK_COMMENT("} verify_klass"); +} + +void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {} + +#endif //ASSERT + +void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry) { + assert_cond(_masm != NULL); + assert(method == xmethod, "interpreter calling convention"); + Label L_no_such_method; + __ beqz(xmethod, L_no_such_method); + __ verify_method_ptr(method); + + if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + + __ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset())); + __ beqz(t0, run_compiled_code); + __ ld(t0, Address(method, Method::interpreter_entry_offset())); + __ jr(t0); + __ BIND(run_compiled_code); + } + + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + Method::from_interpreted_offset(); + __ ld(t0,Address(method, entry_offset)); + __ jr(t0); + __ bind(L_no_such_method); + __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry())); +} + +void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry) { + assert_cond(_masm != NULL); + BLOCK_COMMENT("jump_to_lambda_form {"); + // This is the initial entry point of a lazy method handle. + // After type checking, it picks up the invoker from the LambdaForm. + assert_different_registers(recv, method_temp, temp2); + assert(recv != noreg, "required register"); + assert(method_temp == xmethod, "required register for loading method"); + + // Load the invoker, as MH -> MH.form -> LF.vmentry + __ verify_oop(recv); + __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2); + __ verify_oop(method_temp); + __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg); + + if (VerifyMethodHandles && !for_compiler_entry) { + // make sure recv is already on stack + __ ld(temp2, Address(method_temp, Method::const_offset())); + __ load_sized_value(temp2, + Address(temp2, ConstMethod::size_of_parameters_offset()), + sizeof(u2), /*is_signed*/ false); + Label L; + __ ld(t0, __ argument_address(temp2, -1)); + __ beq(recv, t0, L); + __ ld(x10, __ argument_address(temp2, -1)); + __ ebreak(); + __ BIND(L); + } + + jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); + BLOCK_COMMENT("} jump_to_lambda_form"); +} + +// Code generation +address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, + vmIntrinsics::ID iid) { + assert_cond(_masm != NULL); + const bool not_for_compiler_entry = false; // this is the interpreter entry + assert(is_signature_polymorphic(iid), "expected invoke iid"); + if (iid == vmIntrinsics::_invokeGeneric || + iid == vmIntrinsics::_compiledLambdaForm) { + // Perhaps surprisingly, the symbolic references visible to Java are not directly used. + // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. + // They all allow an appendix argument. + __ ebreak(); // empty stubs make SG sick + return NULL; + } + + // No need in interpreter entry for linkToNative for now. + // Interpreter calls compiled entry through i2c. + if (iid == vmIntrinsics::_linkToNative) { + __ ebreak(); + return NULL; + } + + // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted) + // xmethod: Method* + // x13: argument locator (parameter slot count, added to sp) + // x11: used as temp to hold mh or receiver + // x10, x29: garbage temps, blown away + Register argp = x13; // argument list ptr, live on error paths + Register mh = x11; // MH receiver; dies quickly and is recycled + + // here's where control starts out: + __ align(CodeEntryAlignment); + address entry_point = __ pc(); + + if (VerifyMethodHandles) { + assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + + Label L; + BLOCK_COMMENT("verify_intrinsic_id {"); + __ lhu(t0, Address(xmethod, Method::intrinsic_id_offset_in_bytes())); + __ mv(t1, (int) iid); + __ beq(t0, t1, L); + if (iid == vmIntrinsics::_linkToVirtual || + iid == vmIntrinsics::_linkToSpecial) { + // could do this for all kinds, but would explode assembly code size + trace_method_handle(_masm, "bad Method*::intrinsic_id"); + } + __ ebreak(); + __ bind(L); + BLOCK_COMMENT("} verify_intrinsic_id"); + } + + // First task: Find out how big the argument list is. + Address x13_first_arg_addr; + int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); + assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); + if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { + __ ld(argp, Address(xmethod, Method::const_offset())); + __ load_sized_value(argp, + Address(argp, ConstMethod::size_of_parameters_offset()), + sizeof(u2), /*is_signed*/ false); + x13_first_arg_addr = __ argument_address(argp, -1); + } else { + DEBUG_ONLY(argp = noreg); + } + + if (!is_signature_polymorphic_static(iid)) { + __ ld(mh, x13_first_arg_addr); + DEBUG_ONLY(argp = noreg); + } + + // x13_first_arg_addr is live! + + trace_method_handle_interpreter_entry(_masm, iid); + if (iid == vmIntrinsics::_invokeBasic) { + generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry); + } else { + // Adjust argument list by popping the trailing MemberName argument. + Register recv = noreg; + if (MethodHandles::ref_kind_has_receiver(ref_kind)) { + // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. + __ ld(recv = x12, x13_first_arg_addr); + } + DEBUG_ONLY(argp = noreg); + Register xmember = xmethod; // MemberName ptr; incoming method ptr is dead now + __ pop_reg(xmember); // extract last argument + generate_method_handle_dispatch(_masm, iid, recv, xmember, not_for_compiler_entry); + } + + return entry_point; +} + + +void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + vmIntrinsics::ID iid, + Register receiver_reg, + Register member_reg, + bool for_compiler_entry) { + assert_cond(_masm != NULL); + assert(is_signature_polymorphic(iid), "expected invoke iid"); + // temps used in this code are not used in *either* compiled or interpreted calling sequences + Register temp1 = x7; + Register temp2 = x28; + Register temp3 = x29; // x30 is live by this point: it contains the sender SP + if (for_compiler_entry) { + assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); + assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); + assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); + assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); + } + + assert_different_registers(temp1, temp2, temp3, receiver_reg); + assert_different_registers(temp1, temp2, temp3, member_reg); + + if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { + if (iid == vmIntrinsics::_linkToNative) { + assert(for_compiler_entry, "only compiler entry is supported"); + } + // indirect through MH.form.vmentry.vmtarget + jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry); + } else { + // The method is a member invoker used by direct method handles. + if (VerifyMethodHandles) { + // make sure the trailing argument really is a MemberName (caller responsibility) + verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName), + "MemberName required for invokeVirtual etc."); + } + + Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset())); + Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset())); + Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset())); + Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())); + + Register temp1_recv_klass = temp1; + if (iid != vmIntrinsics::_linkToStatic) { + __ verify_oop(receiver_reg); + if (iid == vmIntrinsics::_linkToSpecial) { + // Don't actually load the klass; just null-check the receiver. + __ null_check(receiver_reg); + } else { + // load receiver klass itself + __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + BLOCK_COMMENT("check_receiver {"); + // The receiver for the MemberName must be in receiver_reg. + // Check the receiver against the MemberName.clazz + if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { + // Did not load it above... + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { + Label L_ok; + Register temp2_defc = temp2; + __ load_heap_oop(temp2_defc, member_clazz, temp3); + load_klass_from_Class(_masm, temp2_defc); + __ verify_klass_ptr(temp2_defc); + __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); + // If we get here, the type check failed! + __ ebreak(); + __ bind(L_ok); + } + BLOCK_COMMENT("} check_receiver"); + } + if (iid == vmIntrinsics::_linkToSpecial || + iid == vmIntrinsics::_linkToStatic) { + DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass + } + + // Live registers at this point: + // member_reg - MemberName that was the trailing argument + // temp1_recv_klass - klass of stacked receiver, if needed + // x30 - interpreter linkage (if interpreted) + // x11 ... x10 - compiler arguments (if compiled) + + Label L_incompatible_class_change_error; + switch (iid) { + case vmIntrinsics::_linkToSpecial: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); + } + __ load_heap_oop(xmethod, member_vmtarget); + __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg); + break; + + case vmIntrinsics::_linkToStatic: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); + } + __ load_heap_oop(xmethod, member_vmtarget); + __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg); + break; + + case vmIntrinsics::_linkToVirtual: + { + // same as TemplateTable::invokevirtual, + // minus the CP setup and profiling: + + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); + } + + // pick out the vtable index from the MemberName, and then we can discard it: + Register temp2_index = temp2; + __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); + + if (VerifyMethodHandles) { + Label L_index_ok; + __ bgez(temp2_index, L_index_ok); + __ ebreak(); + __ BIND(L_index_ok); + } + + // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget + // at this point. And VerifyMethodHandles has already checked clazz, if needed. + + // get target Method* & entry point + __ lookup_virtual_method(temp1_recv_klass, temp2_index, xmethod); + break; + } + + case vmIntrinsics::_linkToInterface: + { + // same as TemplateTable::invokeinterface + // (minus the CP setup and profiling, with different argument motion) + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); + } + + Register temp3_intf = temp3; + __ load_heap_oop(temp3_intf, member_clazz); + load_klass_from_Class(_masm, temp3_intf); + __ verify_klass_ptr(temp3_intf); + + Register rindex = xmethod; + __ access_load_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg); + if (VerifyMethodHandles) { + Label L; + __ bgez(rindex, L); + __ ebreak(); + __ bind(L); + } + + // given intf, index, and recv klass, dispatch to the implementation method + __ lookup_interface_method(temp1_recv_klass, temp3_intf, + // note: next two args must be the same: + rindex, xmethod, + temp2, + L_incompatible_class_change_error); + break; + } + + default: + fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid)); + break; + } + + // live at this point: xmethod, x30 (if interpreted) + + // After figuring out which concrete method to call, jump into it. + // Note that this works in the interpreter with no data motion. + // But the compiled version will require that r2_recv be shifted out. + __ verify_method_ptr(xmethod); + jump_from_method_handle(_masm, xmethod, temp1, for_compiler_entry); + if (iid == vmIntrinsics::_linkToInterface) { + __ bind(L_incompatible_class_change_error); + __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); + } + } + +} + +#ifndef PRODUCT +void trace_method_handle_stub(const char* adaptername, + oopDesc* mh, + intptr_t* saved_regs, + intptr_t* entry_sp) { } + +// The stub wraps the arguments in a struct on the stack to avoid +// dealing with the different calling conventions for passing 6 +// arguments. +struct MethodHandleStubArguments { + const char* adaptername; + oopDesc* mh; + intptr_t* saved_regs; + intptr_t* entry_sp; +}; +void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { } + +void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { } +#endif //PRODUCT Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/methodHandles_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/methodHandles_riscv.hpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Platform-specific definitions for method handles. +// These definitions are inlined into class MethodHandles. + +// Adapters +enum /* platform_dependent_constants */ { + adapter_code_size = 32000 DEBUG_ONLY(+ 120000) +}; + +public: + + static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); + + static void verify_klass(MacroAssembler* _masm, + Register obj, vmClassID klass_id, + const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { + verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle), + "reference is a MH"); + } + + static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; + + // Similar to InterpreterMacroAssembler::jump_from_interpreted. + // Takes care of special dispatch from single stepping too. + static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry); + + static void jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry); Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/nativeInst_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/nativeInst_riscv.cpp @@ -0,0 +1,429 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/compiledIC.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.hpp" +#include "runtime/orderAccess.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/ostream.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif + +Register NativeInstruction::extract_rs1(address instr) { + assert_cond(instr != NULL); + return as_Register(Assembler::extract(((unsigned*)instr)[0], 19, 15)); +} + +Register NativeInstruction::extract_rs2(address instr) { + assert_cond(instr != NULL); + return as_Register(Assembler::extract(((unsigned*)instr)[0], 24, 20)); +} + +Register NativeInstruction::extract_rd(address instr) { + assert_cond(instr != NULL); + return as_Register(Assembler::extract(((unsigned*)instr)[0], 11, 7)); +} + +uint32_t NativeInstruction::extract_opcode(address instr) { + assert_cond(instr != NULL); + return Assembler::extract(((unsigned*)instr)[0], 6, 0); +} + +uint32_t NativeInstruction::extract_funct3(address instr) { + assert_cond(instr != NULL); + return Assembler::extract(((unsigned*)instr)[0], 14, 12); +} + +bool NativeInstruction::is_pc_relative_at(address instr) { + // auipc + jalr + // auipc + addi + // auipc + load + // auipc + fload_load + return (is_auipc_at(instr)) && + (is_addi_at(instr + instruction_size) || + is_jalr_at(instr + instruction_size) || + is_load_at(instr + instruction_size) || + is_float_load_at(instr + instruction_size)) && + check_pc_relative_data_dependency(instr); +} + +// ie:ld(Rd, Label) +bool NativeInstruction::is_load_pc_relative_at(address instr) { + return is_auipc_at(instr) && // auipc + is_ld_at(instr + instruction_size) && // ld + check_load_pc_relative_data_dependency(instr); +} + +bool NativeInstruction::is_movptr_at(address instr) { + return is_lui_at(instr) && // Lui + is_addi_at(instr + instruction_size) && // Addi + is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11 + is_addi_at(instr + instruction_size * 3) && // Addi + is_slli_shift_at(instr + instruction_size * 4, 5) && // Slli Rd, Rs, 5 + (is_addi_at(instr + instruction_size * 5) || + is_jalr_at(instr + instruction_size * 5) || + is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load + check_movptr_data_dependency(instr); +} + +bool NativeInstruction::is_li32_at(address instr) { + return is_lui_at(instr) && // lui + is_addiw_at(instr + instruction_size) && // addiw + check_li32_data_dependency(instr); +} + +bool NativeInstruction::is_li64_at(address instr) { + return is_lui_at(instr) && // lui + is_addi_at(instr + instruction_size) && // addi + is_slli_shift_at(instr + instruction_size * 2, 12) && // Slli Rd, Rs, 12 + is_addi_at(instr + instruction_size * 3) && // addi + is_slli_shift_at(instr + instruction_size * 4, 12) && // Slli Rd, Rs, 12 + is_addi_at(instr + instruction_size * 5) && // addi + is_slli_shift_at(instr + instruction_size * 6, 8) && // Slli Rd, Rs, 8 + is_addi_at(instr + instruction_size * 7) && // addi + check_li64_data_dependency(instr); +} + +void NativeCall::verify() { + assert(NativeCall::is_call_at((address)this), "unexpected code at call site"); +} + +address NativeCall::destination() const { + address addr = (address)this; + assert(NativeInstruction::is_jal_at(instruction_address()), "inst must be jal."); + address destination = MacroAssembler::target_addr_for_insn(instruction_address()); + + // Do we use a trampoline stub for this call? + CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. + assert(cb && cb->is_nmethod(), "sanity"); + nmethod *nm = (nmethod *)cb; + if (nm != NULL && nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) { + // Yes we do, so get the destination from the trampoline stub. + const address trampoline_stub_addr = destination; + destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); + } + + return destination; +} + +// Similar to replace_mt_safe, but just changes the destination. The +// important thing is that free-running threads are able to execute this +// call instruction at all times. +// +// Used in the runtime linkage of calls; see class CompiledIC. +// +// Add parameter assert_lock to switch off assertion +// during code generation, where no patching lock is needed. +void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { + assert(!assert_lock || + (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) || + CompiledICLocker::is_safe(addr_at(0)), + "concurrent code patching"); + + ResourceMark rm; + address addr_call = addr_at(0); + assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); + + // Patch the constant in the call's trampoline stub. + address trampoline_stub_addr = get_trampoline(); + if (trampoline_stub_addr != NULL) { + assert (!is_NativeCallTrampolineStub_at(dest), "chained trampolines"); + nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); + } + + // Patch the call. + if (Assembler::reachable_from_branch_at(addr_call, dest)) { + set_destination(dest); + } else { + assert (trampoline_stub_addr != NULL, "we need a trampoline"); + set_destination(trampoline_stub_addr); + } + + ICache::invalidate_range(addr_call, instruction_size); +} + +address NativeCall::get_trampoline() { + address call_addr = addr_at(0); + + CodeBlob *code = CodeCache::find_blob(call_addr); + assert(code != NULL, "Could not find the containing code blob"); + + address jal_destination = MacroAssembler::pd_call_destination(call_addr); + if (code != NULL && code->contains(jal_destination) && is_NativeCallTrampolineStub_at(jal_destination)) { + return jal_destination; + } + + if (code != NULL && code->is_nmethod()) { + return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); + } + + return NULL; +} + +// Inserts a native call instruction at a given pc +void NativeCall::insert(address code_pos, address entry) { Unimplemented(); } + +//------------------------------------------------------------------- + +void NativeMovConstReg::verify() { + if (!(nativeInstruction_at(instruction_address())->is_movptr() || + is_auipc_at(instruction_address()))) { + fatal("should be MOVPTR or AUIPC"); + } +} + +intptr_t NativeMovConstReg::data() const { + address addr = MacroAssembler::target_addr_for_insn(instruction_address()); + if (maybe_cpool_ref(instruction_address())) { + return *(intptr_t*)addr; + } else { + return (intptr_t)addr; + } +} + +void NativeMovConstReg::set_data(intptr_t x) { + if (maybe_cpool_ref(instruction_address())) { + address addr = MacroAssembler::target_addr_for_insn(instruction_address()); + *(intptr_t*)addr = x; + } else { + // Store x into the instruction stream. + MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x); + ICache::invalidate_range(instruction_address(), movptr_instruction_size); + } + + // Find and replace the oop/metadata corresponding to this + // instruction in oops section. + CodeBlob* cb = CodeCache::find_blob(instruction_address()); + nmethod* nm = cb->as_nmethod_or_null(); + if (nm != NULL) { + RelocIterator iter(nm, instruction_address(), next_instruction_address()); + while (iter.next()) { + if (iter.type() == relocInfo::oop_type) { + oop* oop_addr = iter.oop_reloc()->oop_addr(); + *oop_addr = cast_to_oop(x); + break; + } else if (iter.type() == relocInfo::metadata_type) { + Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); + *metadata_addr = (Metadata*)x; + break; + } + } + } +} + +void NativeMovConstReg::print() { + tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, + p2i(instruction_address()), data()); +} + +//------------------------------------------------------------------- + +int NativeMovRegMem::offset() const { + Unimplemented(); + return 0; +} + +void NativeMovRegMem::set_offset(int x) { Unimplemented(); } + +void NativeMovRegMem::verify() { + Unimplemented(); +} + +//-------------------------------------------------------------------------------- + +void NativeJump::verify() { } + + +void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) { +} + + +address NativeJump::jump_destination() const { + address dest = MacroAssembler::target_addr_for_insn(instruction_address()); + + // We use jump to self as the unresolved address which the inline + // cache code (and relocs) know about + // As a special case we also use sequence movptr_with_offset(r,0), jalr(r,0) + // i.e. jump to 0 when we need leave space for a wide immediate + // load + + // return -1 if jump to self or to 0 + if ((dest == (address) this) || dest == 0) { + dest = (address) -1; + } + + return dest; +}; + +void NativeJump::set_jump_destination(address dest) { + // We use jump to self as the unresolved address which the inline + // cache code (and relocs) know about + if (dest == (address) -1) + dest = instruction_address(); + + MacroAssembler::pd_patch_instruction(instruction_address(), dest); + ICache::invalidate_range(instruction_address(), instruction_size); +} + +//------------------------------------------------------------------- + +address NativeGeneralJump::jump_destination() const { + NativeMovConstReg* move = nativeMovConstReg_at(instruction_address()); + address dest = (address) move->data(); + + // We use jump to self as the unresolved address which the inline + // cache code (and relocs) know about + // As a special case we also use jump to 0 when first generating + // a general jump + + // return -1 if jump to self or to 0 + if ((dest == (address) this) || dest == 0) { + dest = (address) -1; + } + + return dest; +} + +//------------------------------------------------------------------- + +bool NativeInstruction::is_safepoint_poll() { + return is_lwu_to_zr(address(this)); +} + +bool NativeInstruction::is_lwu_to_zr(address instr) { + assert_cond(instr != NULL); + return (extract_opcode(instr) == 0b0000011 && + extract_funct3(instr) == 0b110 && + extract_rd(instr) == zr); // zr +} + +// A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction. +bool NativeInstruction::is_sigill_zombie_not_entrant() { + // jvmci + return uint_at(0) == 0xffffffff; +} + +void NativeIllegalInstruction::insert(address code_pos) { + assert_cond(code_pos != NULL); + *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction +} + +bool NativeInstruction::is_stop() { + return uint_at(0) == 0xffffffff; // an illegal instruction +} + +//------------------------------------------------------------------- + +// MT-safe inserting of a jump over a jump or a nop (used by +// nmethod::make_not_entrant_or_zombie) + +void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { + + assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); + + assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() || + nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(), + "riscv cannot replace non-jump with jump"); + + // Patch this nmethod atomically. + if (Assembler::reachable_from_branch_at(verified_entry, dest)) { + ptrdiff_t offset = dest - verified_entry; + guarantee(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction."); // 1M + + uint32_t insn = 0; + address pInsn = (address)&insn; + Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); + Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); + Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); + Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); + Assembler::patch(pInsn, 11, 7, 0); // zero, no link jump + Assembler::patch(pInsn, 6, 0, 0b1101111); // j, (jal x0 offset) + *(unsigned int*)verified_entry = insn; + } else { + // We use an illegal instruction for marking a method as + // not_entrant or zombie. + NativeIllegalInstruction::insert(verified_entry); + } + + ICache::invalidate_range(verified_entry, instruction_size); +} + +void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { + CodeBuffer cb(code_pos, instruction_size); + MacroAssembler a(&cb); + + int32_t offset = 0; + a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli + a.jalr(x0, t0, offset); // jalr + + ICache::invalidate_range(code_pos, instruction_size); +} + +// MT-safe patching of a long jump instruction. +void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { + ShouldNotCallThis(); +} + + +address NativeCallTrampolineStub::destination(nmethod *nm) const { + return ptr_at(data_offset); +} + +void NativeCallTrampolineStub::set_destination(address new_destination) { + set_ptr_at(data_offset, new_destination); + OrderAccess::release(); +} + +uint32_t NativeMembar::get_kind() { + uint32_t insn = uint_at(0); + + uint32_t predecessor = Assembler::extract(insn, 27, 24); + uint32_t successor = Assembler::extract(insn, 23, 20); + + return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor); +} + +void NativeMembar::set_kind(uint32_t order_kind) { + uint32_t predecessor = 0; + uint32_t successor = 0; + + MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor); + + uint32_t insn = uint_at(0); + address pInsn = (address) &insn; + Assembler::patch(pInsn, 27, 24, predecessor); + Assembler::patch(pInsn, 23, 20, successor); + + address membar = addr_at(0); + *(unsigned int*) membar = insn; +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/nativeInst_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/nativeInst_riscv.hpp @@ -0,0 +1,572 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_NATIVEINST_RISCV_HPP +#define CPU_RISCV_NATIVEINST_RISCV_HPP + +#include "asm/assembler.hpp" +#include "runtime/icache.hpp" +#include "runtime/os.hpp" + +// We have interfaces for the following instructions: +// - NativeInstruction +// - - NativeCall +// - - NativeMovConstReg +// - - NativeMovRegMem +// - - NativeJump +// - - NativeGeneralJump +// - - NativeIllegalInstruction +// - - NativeCallTrampolineStub +// - - NativeMembar +// - - NativeFenceI + +// The base class for different kinds of native instruction abstractions. +// Provides the primitive operations to manipulate code relative to this. + +class NativeCall; + +class NativeInstruction { + friend class Relocation; + friend bool is_NativeCallTrampolineStub_at(address); + public: + enum { + instruction_size = 4, + compressed_instruction_size = 2, + }; + + juint encoding() const { + return uint_at(0); + } + + bool is_jal() const { return is_jal_at(addr_at(0)); } + bool is_movptr() const { return is_movptr_at(addr_at(0)); } + bool is_call() const { return is_call_at(addr_at(0)); } + bool is_jump() const { return is_jump_at(addr_at(0)); } + + static bool is_jal_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; } + static bool is_jalr_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; } + static bool is_branch_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; } + static bool is_ld_at(address instr) { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; } + static bool is_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; } + static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; } + static bool is_auipc_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; } + static bool is_jump_at(address instr) { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); } + static bool is_addi_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; } + static bool is_addiw_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; } + static bool is_lui_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; } + static bool is_slli_shift_at(address instr, uint32_t shift) { + assert_cond(instr != NULL); + return (extract_opcode(instr) == 0b0010011 && // opcode field + extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation + Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift); // shamt field + } + + static Register extract_rs1(address instr); + static Register extract_rs2(address instr); + static Register extract_rd(address instr); + static uint32_t extract_opcode(address instr); + static uint32_t extract_funct3(address instr); + + // the instruction sequence of movptr is as below: + // lui + // addi + // slli + // addi + // slli + // addi/jalr/load + static bool check_movptr_data_dependency(address instr) { + address lui = instr; + address addi1 = lui + instruction_size; + address slli1 = addi1 + instruction_size; + address addi2 = slli1 + instruction_size; + address slli2 = addi2 + instruction_size; + address last_instr = slli2 + instruction_size; + return extract_rs1(addi1) == extract_rd(lui) && + extract_rs1(addi1) == extract_rd(addi1) && + extract_rs1(slli1) == extract_rd(addi1) && + extract_rs1(slli1) == extract_rd(slli1) && + extract_rs1(addi2) == extract_rd(slli1) && + extract_rs1(addi2) == extract_rd(addi2) && + extract_rs1(slli2) == extract_rd(addi2) && + extract_rs1(slli2) == extract_rd(slli2) && + extract_rs1(last_instr) == extract_rd(slli2); + } + + // the instruction sequence of li64 is as below: + // lui + // addi + // slli + // addi + // slli + // addi + // slli + // addi + static bool check_li64_data_dependency(address instr) { + address lui = instr; + address addi1 = lui + instruction_size; + address slli1 = addi1 + instruction_size; + address addi2 = slli1 + instruction_size; + address slli2 = addi2 + instruction_size; + address addi3 = slli2 + instruction_size; + address slli3 = addi3 + instruction_size; + address addi4 = slli3 + instruction_size; + return extract_rs1(addi1) == extract_rd(lui) && + extract_rs1(addi1) == extract_rd(addi1) && + extract_rs1(slli1) == extract_rd(addi1) && + extract_rs1(slli1) == extract_rd(slli1) && + extract_rs1(addi2) == extract_rd(slli1) && + extract_rs1(addi2) == extract_rd(addi2) && + extract_rs1(slli2) == extract_rd(addi2) && + extract_rs1(slli2) == extract_rd(slli2) && + extract_rs1(addi3) == extract_rd(slli2) && + extract_rs1(addi3) == extract_rd(addi3) && + extract_rs1(slli3) == extract_rd(addi3) && + extract_rs1(slli3) == extract_rd(slli3) && + extract_rs1(addi4) == extract_rd(slli3) && + extract_rs1(addi4) == extract_rd(addi4); + } + + // the instruction sequence of li32 is as below: + // lui + // addiw + static bool check_li32_data_dependency(address instr) { + address lui = instr; + address addiw = lui + instruction_size; + + return extract_rs1(addiw) == extract_rd(lui) && + extract_rs1(addiw) == extract_rd(addiw); + } + + // the instruction sequence of pc-relative is as below: + // auipc + // jalr/addi/load/float_load + static bool check_pc_relative_data_dependency(address instr) { + address auipc = instr; + address last_instr = auipc + instruction_size; + + return extract_rs1(last_instr) == extract_rd(auipc); + } + + // the instruction sequence of load_label is as below: + // auipc + // load + static bool check_load_pc_relative_data_dependency(address instr) { + address auipc = instr; + address load = auipc + instruction_size; + + return extract_rd(load) == extract_rd(auipc) && + extract_rs1(load) == extract_rd(load); + } + + static bool is_movptr_at(address instr); + static bool is_li32_at(address instr); + static bool is_li64_at(address instr); + static bool is_pc_relative_at(address branch); + static bool is_load_pc_relative_at(address branch); + + static bool is_call_at(address instr) { + if (is_jal_at(instr) || is_jalr_at(instr)) { + return true; + } + return false; + } + static bool is_lwu_to_zr(address instr); + + inline bool is_nop(); + inline bool is_jump_or_nop(); + bool is_safepoint_poll(); + bool is_sigill_zombie_not_entrant(); + bool is_stop(); + + protected: + address addr_at(int offset) const { return address(this) + offset; } + + jint int_at(int offset) const { return *(jint*) addr_at(offset); } + juint uint_at(int offset) const { return *(juint*) addr_at(offset); } + + address ptr_at(int offset) const { return *(address*) addr_at(offset); } + + oop oop_at (int offset) const { return *(oop*) addr_at(offset); } + + + void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; } + void set_uint_at(int offset, jint i) { *(juint*)addr_at(offset) = i; } + void set_ptr_at (int offset, address ptr) { *(address*) addr_at(offset) = ptr; } + void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; } + + public: + + inline friend NativeInstruction* nativeInstruction_at(address addr); + + static bool maybe_cpool_ref(address instr) { + return is_auipc_at(instr); + } + + bool is_membar() { + return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0; + } +}; + +inline NativeInstruction* nativeInstruction_at(address addr) { + return (NativeInstruction*)addr; +} + +// The natural type of an RISCV instruction is uint32_t +inline NativeInstruction* nativeInstruction_at(uint32_t *addr) { + return (NativeInstruction*)addr; +} + +inline NativeCall* nativeCall_at(address addr); +// The NativeCall is an abstraction for accessing/manipulating native +// call instructions (used to manipulate inline caches, primitive & +// DSO calls, etc.). + +class NativeCall: public NativeInstruction { + public: + enum RISCV_specific_constants { + instruction_size = 4, + instruction_offset = 0, + displacement_offset = 0, + return_address_offset = 4 + }; + + address instruction_address() const { return addr_at(instruction_offset); } + address next_instruction_address() const { return addr_at(return_address_offset); } + address return_address() const { return addr_at(return_address_offset); } + address destination() const; + + void set_destination(address dest) { + assert(is_jal(), "Should be jal instruction!"); + intptr_t offset = (intptr_t)(dest - instruction_address()); + assert((offset & 0x1) == 0, "bad alignment"); + assert(is_imm_in_range(offset, 20, 1), "encoding constraint"); + unsigned int insn = 0b1101111; // jal + address pInsn = (address)(&insn); + Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); + Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); + Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); + Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); + Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra + set_int_at(displacement_offset, insn); + } + + void verify_alignment() {} // do nothing on riscv + void verify(); + void print(); + + // Creation + inline friend NativeCall* nativeCall_at(address addr); + inline friend NativeCall* nativeCall_before(address return_address); + + static bool is_call_before(address return_address) { + return is_call_at(return_address - NativeCall::return_address_offset); + } + + // MT-safe patching of a call instruction. + static void insert(address code_pos, address entry); + + static void replace_mt_safe(address instr_addr, address code_buffer); + + // Similar to replace_mt_safe, but just changes the destination. The + // important thing is that free-running threads are able to execute + // this call instruction at all times. If the call is an immediate BL + // instruction we can simply rely on atomicity of 32-bit writes to + // make sure other threads will see no intermediate states. + + // We cannot rely on locks here, since the free-running threads must run at + // full speed. + // + // Used in the runtime linkage of calls; see class CompiledIC. + // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.) + + // The parameter assert_lock disables the assertion during code generation. + void set_destination_mt_safe(address dest, bool assert_lock = true); + + address get_trampoline(); +}; + +inline NativeCall* nativeCall_at(address addr) { + assert_cond(addr != NULL); + NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset); +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +inline NativeCall* nativeCall_before(address return_address) { + assert_cond(return_address != NULL); + NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset); +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +// An interface for accessing/manipulating native mov reg, imm instructions. +// (used to manipulate inlined 64-bit data calls, etc.) +class NativeMovConstReg: public NativeInstruction { + public: + enum RISCV_specific_constants { + movptr_instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi. See movptr(). + movptr_with_offset_instruction_size = 5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset(). + load_pc_relative_instruction_size = 2 * NativeInstruction::instruction_size, // auipc, ld + instruction_offset = 0, + displacement_offset = 0 + }; + + address instruction_address() const { return addr_at(instruction_offset); } + address next_instruction_address() const { + // if the instruction at 5 * instruction_size is addi, + // it means a lui + addi + slli + addi + slli + addi instruction sequence, + // and the next instruction address should be addr_at(6 * instruction_size). + // However, when the instruction at 5 * instruction_size isn't addi, + // the next instruction address should be addr_at(5 * instruction_size) + if (nativeInstruction_at(instruction_address())->is_movptr()) { + if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) { + // Assume: lui, addi, slli, addi, slli, addi + return addr_at(movptr_instruction_size); + } else { + // Assume: lui, addi, slli, addi, slli + return addr_at(movptr_with_offset_instruction_size); + } + } else if (is_load_pc_relative_at(instruction_address())) { + // Assume: auipc, ld + return addr_at(load_pc_relative_instruction_size); + } + guarantee(false, "Unknown instruction in NativeMovConstReg"); + return NULL; + } + + intptr_t data() const; + void set_data(intptr_t x); + + void flush() { + if (!maybe_cpool_ref(instruction_address())) { + ICache::invalidate_range(instruction_address(), movptr_instruction_size); + } + } + + void verify(); + void print(); + + // Creation + inline friend NativeMovConstReg* nativeMovConstReg_at(address addr); + inline friend NativeMovConstReg* nativeMovConstReg_before(address addr); +}; + +inline NativeMovConstReg* nativeMovConstReg_at(address addr) { + assert_cond(addr != NULL); + NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +inline NativeMovConstReg* nativeMovConstReg_before(address addr) { + assert_cond(addr != NULL); + NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +// RISCV should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented. +class NativeMovRegMem: public NativeInstruction { + public: + int instruction_start() const { + Unimplemented(); + return 0; + } + + address instruction_address() const { + Unimplemented(); + return NULL; + } + + int num_bytes_to_end_of_patch() const { + Unimplemented(); + return 0; + } + + int offset() const; + + void set_offset(int x); + + void add_offset_in_bytes(int add_offset) { Unimplemented(); } + + void verify(); + void print(); + + private: + inline friend NativeMovRegMem* nativeMovRegMem_at (address addr); +}; + +inline NativeMovRegMem* nativeMovRegMem_at (address addr) { + Unimplemented(); + return NULL; +} + +class NativeJump: public NativeInstruction { + public: + enum RISCV_specific_constants { + instruction_size = NativeInstruction::instruction_size, + instruction_offset = 0, + data_offset = 0, + next_instruction_offset = NativeInstruction::instruction_size + }; + + address instruction_address() const { return addr_at(instruction_offset); } + address next_instruction_address() const { return addr_at(instruction_size); } + address jump_destination() const; + void set_jump_destination(address dest); + + // Creation + inline friend NativeJump* nativeJump_at(address address); + + void verify(); + + // Insertion of native jump instruction + static void insert(address code_pos, address entry); + // MT-safe insertion of native jump at verified method entry + static void check_verified_entry_alignment(address entry, address verified_entry); + static void patch_verified_entry(address entry, address verified_entry, address dest); +}; + +inline NativeJump* nativeJump_at(address addr) { + NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset); +#ifdef ASSERT + jump->verify(); +#endif + return jump; +} + +class NativeGeneralJump: public NativeJump { +public: + enum RISCV_specific_constants { + instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr + instruction_offset = 0, + data_offset = 0, + next_instruction_offset = 6 * NativeInstruction::instruction_size // lui, addi, slli, addi, slli, jalr + }; + + address jump_destination() const; + + static void insert_unconditional(address code_pos, address entry); + static void replace_mt_safe(address instr_addr, address code_buffer); +}; + +inline NativeGeneralJump* nativeGeneralJump_at(address addr) { + assert_cond(addr != NULL); + NativeGeneralJump* jump = (NativeGeneralJump*)(addr); + debug_only(jump->verify();) + return jump; +} + +class NativeIllegalInstruction: public NativeInstruction { + public: + // Insert illegal opcode as specific address + static void insert(address code_pos); +}; + +inline bool NativeInstruction::is_nop() { + uint32_t insn = *(uint32_t*)addr_at(0); + return insn == 0x13; +} + +inline bool NativeInstruction::is_jump_or_nop() { + return is_nop() || is_jump(); +} + +// Call trampoline stubs. +class NativeCallTrampolineStub : public NativeInstruction { + public: + + enum RISCV_specific_constants { + // Refer to function emit_trampoline_stub. + instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address + data_offset = 3 * NativeInstruction::instruction_size, // auipc + ld + jr + }; + + address destination(nmethod *nm = NULL) const; + void set_destination(address new_destination); + ptrdiff_t destination_offset() const; +}; + +inline bool is_NativeCallTrampolineStub_at(address addr) { + // Ensure that the stub is exactly + // ld t0, L--->auipc + ld + // jr t0 + // L: + + // judge inst + register + imm + // 1). check the instructions: auipc + ld + jalr + // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0 + // 3). check if the offset in ld[31:20] equals the data_offset + assert_cond(addr != NULL); + const int instr_size = NativeInstruction::instruction_size; + if (NativeInstruction::is_auipc_at(addr) && + NativeInstruction::is_ld_at(addr + instr_size) && + NativeInstruction::is_jalr_at(addr + 2 * instr_size) && + (NativeInstruction::extract_rd(addr) == x5) && + (NativeInstruction::extract_rd(addr + instr_size) == x5) && + (NativeInstruction::extract_rs1(addr + instr_size) == x5) && + (NativeInstruction::extract_rs1(addr + 2 * instr_size) == x5) && + (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) { + return true; + } + return false; +} + +inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { + assert_cond(addr != NULL); + assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found"); + return (NativeCallTrampolineStub*)addr; +} + +class NativeMembar : public NativeInstruction { +public: + uint32_t get_kind(); + void set_kind(uint32_t order_kind); +}; + +inline NativeMembar *NativeMembar_at(address addr) { + assert_cond(addr != NULL); + assert(nativeInstruction_at(addr)->is_membar(), "no membar found"); + return (NativeMembar*)addr; +} + +class NativeFenceI : public NativeInstruction { +public: + static inline int instruction_size() { + // 2 for fence.i + fence + return (UseConservativeFence ? 2 : 1) * NativeInstruction::instruction_size; + } +}; + +#endif // CPU_RISCV_NATIVEINST_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/registerMap_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/registerMap_riscv.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/registerMap.hpp" +#include "vmreg_riscv.inline.hpp" + +address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const { + if (base_reg->is_VectorRegister()) { + assert(base_reg->is_concrete(), "must pass base reg"); + int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) / + VectorRegisterImpl::max_slots_per_register; + intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size; + address base_location = location(base_reg); + if (base_location != NULL) { + return base_location + offset_in_bytes; + } else { + return NULL; + } + } else { + return location(base_reg->next(slot_idx)); + } +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/registerMap_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/registerMap_riscv.hpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_REGISTERMAP_RISCV_HPP +#define CPU_RISCV_REGISTERMAP_RISCV_HPP + +// machine-dependent implemention for register maps + friend class frame; + + private: + // This is the hook for finding a register in an "well-known" location, + // such as a register block of a predetermined format. + address pd_location(VMReg reg) const { return NULL; } + address pd_location(VMReg base_reg, int slot_idx) const; + + // no PD state to clear or copy: + void pd_clear() {} + void pd_initialize() {} + void pd_initialize_from(const RegisterMap* map) {} + +#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/register_definitions_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/register_definitions_riscv.cpp @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "asm/register.hpp" +#include "interp_masm_riscv.hpp" +#include "register_riscv.hpp" + +REGISTER_DEFINITION(Register, noreg); + +REGISTER_DEFINITION(Register, x0); +REGISTER_DEFINITION(Register, x1); +REGISTER_DEFINITION(Register, x2); +REGISTER_DEFINITION(Register, x3); +REGISTER_DEFINITION(Register, x4); +REGISTER_DEFINITION(Register, x5); +REGISTER_DEFINITION(Register, x6); +REGISTER_DEFINITION(Register, x7); +REGISTER_DEFINITION(Register, x8); +REGISTER_DEFINITION(Register, x9); +REGISTER_DEFINITION(Register, x10); +REGISTER_DEFINITION(Register, x11); +REGISTER_DEFINITION(Register, x12); +REGISTER_DEFINITION(Register, x13); +REGISTER_DEFINITION(Register, x14); +REGISTER_DEFINITION(Register, x15); +REGISTER_DEFINITION(Register, x16); +REGISTER_DEFINITION(Register, x17); +REGISTER_DEFINITION(Register, x18); +REGISTER_DEFINITION(Register, x19); +REGISTER_DEFINITION(Register, x20); +REGISTER_DEFINITION(Register, x21); +REGISTER_DEFINITION(Register, x22); +REGISTER_DEFINITION(Register, x23); +REGISTER_DEFINITION(Register, x24); +REGISTER_DEFINITION(Register, x25); +REGISTER_DEFINITION(Register, x26); +REGISTER_DEFINITION(Register, x27); +REGISTER_DEFINITION(Register, x28); +REGISTER_DEFINITION(Register, x29); +REGISTER_DEFINITION(Register, x30); +REGISTER_DEFINITION(Register, x31); + +REGISTER_DEFINITION(FloatRegister, fnoreg); + +REGISTER_DEFINITION(FloatRegister, f0); +REGISTER_DEFINITION(FloatRegister, f1); +REGISTER_DEFINITION(FloatRegister, f2); +REGISTER_DEFINITION(FloatRegister, f3); +REGISTER_DEFINITION(FloatRegister, f4); +REGISTER_DEFINITION(FloatRegister, f5); +REGISTER_DEFINITION(FloatRegister, f6); +REGISTER_DEFINITION(FloatRegister, f7); +REGISTER_DEFINITION(FloatRegister, f8); +REGISTER_DEFINITION(FloatRegister, f9); +REGISTER_DEFINITION(FloatRegister, f10); +REGISTER_DEFINITION(FloatRegister, f11); +REGISTER_DEFINITION(FloatRegister, f12); +REGISTER_DEFINITION(FloatRegister, f13); +REGISTER_DEFINITION(FloatRegister, f14); +REGISTER_DEFINITION(FloatRegister, f15); +REGISTER_DEFINITION(FloatRegister, f16); +REGISTER_DEFINITION(FloatRegister, f17); +REGISTER_DEFINITION(FloatRegister, f18); +REGISTER_DEFINITION(FloatRegister, f19); +REGISTER_DEFINITION(FloatRegister, f20); +REGISTER_DEFINITION(FloatRegister, f21); +REGISTER_DEFINITION(FloatRegister, f22); +REGISTER_DEFINITION(FloatRegister, f23); +REGISTER_DEFINITION(FloatRegister, f24); +REGISTER_DEFINITION(FloatRegister, f25); +REGISTER_DEFINITION(FloatRegister, f26); +REGISTER_DEFINITION(FloatRegister, f27); +REGISTER_DEFINITION(FloatRegister, f28); +REGISTER_DEFINITION(FloatRegister, f29); +REGISTER_DEFINITION(FloatRegister, f30); +REGISTER_DEFINITION(FloatRegister, f31); + +REGISTER_DEFINITION(VectorRegister, vnoreg); + +REGISTER_DEFINITION(VectorRegister, v0); +REGISTER_DEFINITION(VectorRegister, v1); +REGISTER_DEFINITION(VectorRegister, v2); +REGISTER_DEFINITION(VectorRegister, v3); +REGISTER_DEFINITION(VectorRegister, v4); +REGISTER_DEFINITION(VectorRegister, v5); +REGISTER_DEFINITION(VectorRegister, v6); +REGISTER_DEFINITION(VectorRegister, v7); +REGISTER_DEFINITION(VectorRegister, v8); +REGISTER_DEFINITION(VectorRegister, v9); +REGISTER_DEFINITION(VectorRegister, v10); +REGISTER_DEFINITION(VectorRegister, v11); +REGISTER_DEFINITION(VectorRegister, v12); +REGISTER_DEFINITION(VectorRegister, v13); +REGISTER_DEFINITION(VectorRegister, v14); +REGISTER_DEFINITION(VectorRegister, v15); +REGISTER_DEFINITION(VectorRegister, v16); +REGISTER_DEFINITION(VectorRegister, v17); +REGISTER_DEFINITION(VectorRegister, v18); +REGISTER_DEFINITION(VectorRegister, v19); +REGISTER_DEFINITION(VectorRegister, v20); +REGISTER_DEFINITION(VectorRegister, v21); +REGISTER_DEFINITION(VectorRegister, v22); +REGISTER_DEFINITION(VectorRegister, v23); +REGISTER_DEFINITION(VectorRegister, v24); +REGISTER_DEFINITION(VectorRegister, v25); +REGISTER_DEFINITION(VectorRegister, v26); +REGISTER_DEFINITION(VectorRegister, v27); +REGISTER_DEFINITION(VectorRegister, v28); +REGISTER_DEFINITION(VectorRegister, v29); +REGISTER_DEFINITION(VectorRegister, v30); +REGISTER_DEFINITION(VectorRegister, v31); + +REGISTER_DEFINITION(Register, c_rarg0); +REGISTER_DEFINITION(Register, c_rarg1); +REGISTER_DEFINITION(Register, c_rarg2); +REGISTER_DEFINITION(Register, c_rarg3); +REGISTER_DEFINITION(Register, c_rarg4); +REGISTER_DEFINITION(Register, c_rarg5); +REGISTER_DEFINITION(Register, c_rarg6); +REGISTER_DEFINITION(Register, c_rarg7); + +REGISTER_DEFINITION(FloatRegister, c_farg0); +REGISTER_DEFINITION(FloatRegister, c_farg1); +REGISTER_DEFINITION(FloatRegister, c_farg2); +REGISTER_DEFINITION(FloatRegister, c_farg3); +REGISTER_DEFINITION(FloatRegister, c_farg4); +REGISTER_DEFINITION(FloatRegister, c_farg5); +REGISTER_DEFINITION(FloatRegister, c_farg6); +REGISTER_DEFINITION(FloatRegister, c_farg7); + +REGISTER_DEFINITION(Register, j_rarg0); +REGISTER_DEFINITION(Register, j_rarg1); +REGISTER_DEFINITION(Register, j_rarg2); +REGISTER_DEFINITION(Register, j_rarg3); +REGISTER_DEFINITION(Register, j_rarg4); +REGISTER_DEFINITION(Register, j_rarg5); +REGISTER_DEFINITION(Register, j_rarg6); +REGISTER_DEFINITION(Register, j_rarg7); + +REGISTER_DEFINITION(FloatRegister, j_farg0); +REGISTER_DEFINITION(FloatRegister, j_farg1); +REGISTER_DEFINITION(FloatRegister, j_farg2); +REGISTER_DEFINITION(FloatRegister, j_farg3); +REGISTER_DEFINITION(FloatRegister, j_farg4); +REGISTER_DEFINITION(FloatRegister, j_farg5); +REGISTER_DEFINITION(FloatRegister, j_farg6); +REGISTER_DEFINITION(FloatRegister, j_farg7); + +REGISTER_DEFINITION(Register, zr); +REGISTER_DEFINITION(Register, gp); +REGISTER_DEFINITION(Register, tp); +REGISTER_DEFINITION(Register, xmethod); +REGISTER_DEFINITION(Register, ra); +REGISTER_DEFINITION(Register, sp); +REGISTER_DEFINITION(Register, fp); +REGISTER_DEFINITION(Register, xheapbase); +REGISTER_DEFINITION(Register, xcpool); +REGISTER_DEFINITION(Register, xmonitors); +REGISTER_DEFINITION(Register, xlocals); +REGISTER_DEFINITION(Register, xthread); +REGISTER_DEFINITION(Register, xbcp); +REGISTER_DEFINITION(Register, xdispatch); +REGISTER_DEFINITION(Register, esp); + +REGISTER_DEFINITION(Register, t0); +REGISTER_DEFINITION(Register, t1); +REGISTER_DEFINITION(Register, t2); Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/register_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/register_riscv.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "register_riscv.hpp" + +const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * + RegisterImpl::max_slots_per_register; + +const int ConcreteRegisterImpl::max_fpr = + ConcreteRegisterImpl::max_gpr + + FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; + +const int ConcreteRegisterImpl::max_vpr = + ConcreteRegisterImpl::max_fpr + + VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register; + + +const char* RegisterImpl::name() const { + const char* names[number_of_registers] = { + "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9", + "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7", + "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals", + "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod" + }; + return is_valid() ? names[encoding()] : "noreg"; +} + +const char* FloatRegisterImpl::name() const { + const char* names[number_of_registers] = { + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31" + }; + return is_valid() ? names[encoding()] : "noreg"; +} + +const char* VectorRegisterImpl::name() const { + const char* names[number_of_registers] = { + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" + }; + return is_valid() ? names[encoding()] : "noreg"; +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/register_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/register_riscv.hpp @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_REGISTER_RISCV_HPP +#define CPU_RISCV_REGISTER_RISCV_HPP + +#include "asm/register.hpp" + +#define CSR_FFLAGS 0x001 // Floating-Point Accrued Exceptions. +#define CSR_FRM 0x002 // Floating-Point Dynamic Rounding Mode. +#define CSR_FCSR 0x003 // Floating-Point Control and Status Register (frm + fflags). +#define CSR_VSTART 0x008 // Vector start position +#define CSR_VXSAT 0x009 // Fixed-Point Saturate Flag +#define CSR_VXRM 0x00A // Fixed-Point Rounding Mode +#define CSR_VCSR 0x00F // Vector control and status register +#define CSR_VL 0xC20 // Vector length +#define CSR_VTYPE 0xC21 // Vector data type register +#define CSR_VLENB 0xC22 // VLEN/8 (vector register length in bytes) +#define CSR_CYCLE 0xc00 // Cycle counter for RDCYCLE instruction. +#define CSR_TIME 0xc01 // Timer for RDTIME instruction. +#define CSR_INSTERT 0xc02 // Instructions-retired counter for RDINSTRET instruction. + +class VMRegImpl; +typedef VMRegImpl* VMReg; + +// Use Register as shortcut +class RegisterImpl; +typedef RegisterImpl* Register; + +inline const Register as_Register(int encoding) { + return (Register)(intptr_t) encoding; +} + +class RegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 32, + max_slots_per_register = 2, + + // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable + // for compressed instructions. See Table 17.2 in spec. + compressed_register_base = 8, + compressed_register_top = 15, + }; + + // derived registers, offsets, and addresses + Register successor() const { return as_Register(encoding() + 1); } + + // construction + inline friend const Register as_Register(int encoding); + + VMReg as_VMReg(); + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; + int encoding_nocheck() const { return (intptr_t)this; } + + // for rvc + int compressed_encoding() const { + assert(is_compressed_valid(), "invalid compressed register"); + return encoding() - compressed_register_base; + } + + int compressed_encoding_nocheck() const { + return encoding_nocheck() - compressed_register_base; + } + + bool is_compressed_valid() const { + return encoding_nocheck() >= compressed_register_base && + encoding_nocheck() <= compressed_register_top; + } +}; + +// The integer registers of the RISCV architecture + +CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); + + +CONSTANT_REGISTER_DECLARATION(Register, x0, (0)); +CONSTANT_REGISTER_DECLARATION(Register, x1, (1)); +CONSTANT_REGISTER_DECLARATION(Register, x2, (2)); +CONSTANT_REGISTER_DECLARATION(Register, x3, (3)); +CONSTANT_REGISTER_DECLARATION(Register, x4, (4)); +CONSTANT_REGISTER_DECLARATION(Register, x5, (5)); +CONSTANT_REGISTER_DECLARATION(Register, x6, (6)); +CONSTANT_REGISTER_DECLARATION(Register, x7, (7)); +CONSTANT_REGISTER_DECLARATION(Register, x8, (8)); +CONSTANT_REGISTER_DECLARATION(Register, x9, (9)); +CONSTANT_REGISTER_DECLARATION(Register, x10, (10)); +CONSTANT_REGISTER_DECLARATION(Register, x11, (11)); +CONSTANT_REGISTER_DECLARATION(Register, x12, (12)); +CONSTANT_REGISTER_DECLARATION(Register, x13, (13)); +CONSTANT_REGISTER_DECLARATION(Register, x14, (14)); +CONSTANT_REGISTER_DECLARATION(Register, x15, (15)); +CONSTANT_REGISTER_DECLARATION(Register, x16, (16)); +CONSTANT_REGISTER_DECLARATION(Register, x17, (17)); +CONSTANT_REGISTER_DECLARATION(Register, x18, (18)); +CONSTANT_REGISTER_DECLARATION(Register, x19, (19)); +CONSTANT_REGISTER_DECLARATION(Register, x20, (20)); +CONSTANT_REGISTER_DECLARATION(Register, x21, (21)); +CONSTANT_REGISTER_DECLARATION(Register, x22, (22)); +CONSTANT_REGISTER_DECLARATION(Register, x23, (23)); +CONSTANT_REGISTER_DECLARATION(Register, x24, (24)); +CONSTANT_REGISTER_DECLARATION(Register, x25, (25)); +CONSTANT_REGISTER_DECLARATION(Register, x26, (26)); +CONSTANT_REGISTER_DECLARATION(Register, x27, (27)); +CONSTANT_REGISTER_DECLARATION(Register, x28, (28)); +CONSTANT_REGISTER_DECLARATION(Register, x29, (29)); +CONSTANT_REGISTER_DECLARATION(Register, x30, (30)); +CONSTANT_REGISTER_DECLARATION(Register, x31, (31)); + +// Use FloatRegister as shortcut +class FloatRegisterImpl; +typedef FloatRegisterImpl* FloatRegister; + +inline FloatRegister as_FloatRegister(int encoding) { + return (FloatRegister)(intptr_t) encoding; +} + +// The implementation of floating point registers for the architecture +class FloatRegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 32, + max_slots_per_register = 2, + + // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec. + compressed_register_base = 8, + compressed_register_top = 15, + }; + + // construction + inline friend FloatRegister as_FloatRegister(int encoding); + + VMReg as_VMReg(); + + // derived registers, offsets, and addresses + FloatRegister successor() const { return as_FloatRegister((encoding() + 1) % 32); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + int encoding_nocheck() const { return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; + + // for rvc + int compressed_encoding() const { + assert(is_compressed_valid(), "invalid compressed register"); + return encoding() - compressed_register_base; + } + + int compressed_encoding_nocheck() const { + return encoding_nocheck() - compressed_register_base; + } + + bool is_compressed_valid() const { + return encoding_nocheck() >= compressed_register_base && + encoding_nocheck() <= compressed_register_top; + } +}; + +// The float registers of the RISCV architecture + +CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); + +CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); + +// Use VectorRegister as shortcut +class VectorRegisterImpl; +typedef VectorRegisterImpl* VectorRegister; + +inline VectorRegister as_VectorRegister(int encoding) { + return (VectorRegister)(intptr_t) encoding; +} + +// The implementation of vector registers for RVV +class VectorRegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 32, + max_slots_per_register = 4 + }; + + // construction + inline friend VectorRegister as_VectorRegister(int encoding); + + VMReg as_VMReg(); + + // derived registers, offsets, and addresses + VectorRegister successor() const { return as_VectorRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + int encoding_nocheck() const { return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; + +}; + +// The vector registers of RVV +CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1)); + +CONSTANT_REGISTER_DECLARATION(VectorRegister, v0 , ( 0)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v1 , ( 1)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v2 , ( 2)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v3 , ( 3)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v4 , ( 4)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v5 , ( 5)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v6 , ( 6)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v7 , ( 7)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v8 , ( 8)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v9 , ( 9)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v10 , (10)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v11 , (11)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v12 , (12)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v13 , (13)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v14 , (14)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v15 , (15)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v16 , (16)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v17 , (17)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v18 , (18)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v19 , (19)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v20 , (20)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v21 , (21)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v22 , (22)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v23 , (23)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v24 , (24)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v25 , (25)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v26 , (26)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v27 , (27)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v28 , (28)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v29 , (29)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v30 , (30)); +CONSTANT_REGISTER_DECLARATION(VectorRegister, v31 , (31)); + + +// Need to know the total number of registers of all sorts for SharedInfo. +// Define a class that exports it. +class ConcreteRegisterImpl : public AbstractRegisterImpl { + public: + enum { + // A big enough number for C2: all the registers plus flags + // This number must be large enough to cover REG_COUNT (defined by c2) registers. + // There is no requirement that any ordering here matches any ordering c2 gives + // it's optoregs. + + number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + + FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers + + VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers) + }; + + // added to make it compile + static const int max_gpr; + static const int max_fpr; + static const int max_vpr; +}; + +template class RegSetIterator; + +// A set of registers +template +class AbstractRegSet { + uint32_t _bitset; + + AbstractRegSet(uint32_t bitset) : _bitset(bitset) { } + +public: + + AbstractRegSet() : _bitset(0) { } + + AbstractRegSet(RegImpl r1) : _bitset(1 << r1->encoding()) { } + + AbstractRegSet operator+(const AbstractRegSet aSet) const { + AbstractRegSet result(_bitset | aSet._bitset); + return result; + } + + AbstractRegSet operator-(const AbstractRegSet aSet) const { + AbstractRegSet result(_bitset & ~aSet._bitset); + return result; + } + + AbstractRegSet &operator+=(const AbstractRegSet aSet) { + *this = *this + aSet; + return *this; + } + + AbstractRegSet &operator-=(const AbstractRegSet aSet) { + *this = *this - aSet; + return *this; + } + + static AbstractRegSet of(RegImpl r1) { + return AbstractRegSet(r1); + } + + static AbstractRegSet of(RegImpl r1, RegImpl r2) { + return of(r1) + r2; + } + + static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3) { + return of(r1, r2) + r3; + } + + static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4) { + return of(r1, r2, r3) + r4; + } + + static AbstractRegSet range(RegImpl start, RegImpl end) { + uint32_t bits = ~0; + bits <<= start->encoding(); + bits <<= 31 - end->encoding(); + bits >>= 31 - end->encoding(); + + return AbstractRegSet(bits); + } + + uint32_t bits() const { return _bitset; } + +private: + + RegImpl first(); + +public: + + friend class RegSetIterator; + + RegSetIterator begin(); +}; + +typedef AbstractRegSet RegSet; +typedef AbstractRegSet FloatRegSet; +typedef AbstractRegSet VectorRegSet; + +template +class RegSetIterator { + AbstractRegSet _regs; + +public: + RegSetIterator(AbstractRegSet x): _regs(x) {} + RegSetIterator(const RegSetIterator& mit) : _regs(mit._regs) {} + + RegSetIterator& operator++() { + RegImpl r = _regs.first(); + if (r->is_valid()) + _regs -= r; + return *this; + } + + bool operator==(const RegSetIterator& rhs) const { + return _regs.bits() == rhs._regs.bits(); + } + bool operator!=(const RegSetIterator& rhs) const { + return ! (rhs == *this); + } + + RegImpl operator*() { + return _regs.first(); + } +}; + +template +inline RegSetIterator AbstractRegSet::begin() { + return RegSetIterator(*this); +} + +#endif // CPU_RISCV_REGISTER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/relocInfo_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/relocInfo_riscv.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/relocInfo.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/safepoint.hpp" + +void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { + if (verify_only) { + return; + } + + int bytes; + + switch (type()) { + case relocInfo::oop_type: { + oop_Relocation *reloc = (oop_Relocation *)this; + // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate + if (NativeInstruction::is_load_pc_relative_at(addr())) { + address constptr = (address)code()->oop_addr_at(reloc->oop_index()); + bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr); + assert(*(address*)constptr == x, "error in oop relocation"); + } else { + bytes = MacroAssembler::patch_oop(addr(), x); + } + break; + } + default: + bytes = MacroAssembler::pd_patch_instruction_size(addr(), x); + break; + } + ICache::invalidate_range(addr(), bytes); +} + +address Relocation::pd_call_destination(address orig_addr) { + assert(is_call(), "should be an address instruction here"); + if (NativeCall::is_call_at(addr())) { + address trampoline = nativeCall_at(addr())->get_trampoline(); + if (trampoline != NULL) { + return nativeCallTrampolineStub_at(trampoline)->destination(); + } + } + if (orig_addr != NULL) { + // the extracted address from the instructions in address orig_addr + address new_addr = MacroAssembler::pd_call_destination(orig_addr); + // If call is branch to self, don't try to relocate it, just leave it + // as branch to self. This happens during code generation if the code + // buffer expands. It will be relocated to the trampoline above once + // code generation is complete. + new_addr = (new_addr == orig_addr) ? addr() : new_addr; + return new_addr; + } + return MacroAssembler::pd_call_destination(addr()); +} + +void Relocation::pd_set_call_destination(address x) { + assert(is_call(), "should be an address instruction here"); + if (NativeCall::is_call_at(addr())) { + address trampoline = nativeCall_at(addr())->get_trampoline(); + if (trampoline != NULL) { + nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false); + return; + } + } + MacroAssembler::pd_patch_instruction_size(addr(), x); + address pd_call = pd_call_destination(addr()); + assert(pd_call == x, "fail in reloc"); +} + +address* Relocation::pd_address_in_code() { + assert(NativeCall::is_load_pc_relative_at(addr()), "Not the expected instruction sequence!"); + return (address*)(MacroAssembler::target_addr_for_insn(addr())); +} + +address Relocation::pd_get_address_from_code() { + return MacroAssembler::pd_call_destination(addr()); +} + +void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { + if (NativeInstruction::maybe_cpool_ref(addr())) { + address old_addr = old_addr_for(addr(), src, dest); + MacroAssembler::pd_patch_instruction_size(addr(), MacroAssembler::target_addr_for_insn(old_addr)); + } +} + +void metadata_Relocation::pd_fix_value(address x) { +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/relocInfo_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/relocInfo_riscv.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_RELOCINFO_RISCV_HPP +#define CPU_RISCV_RELOCINFO_RISCV_HPP + + // machine-dependent parts of class relocInfo + private: + enum { + // Relocations are byte-aligned. + offset_unit = 1, + // Must be at least 1 for RelocInfo::narrow_oop_in_const. + format_width = 1 + }; + + public: + + // This platform has no oops in the code that are not also + // listed in the oop section. + static bool mustIterateImmediateOopsInCode() { return false; } + +#endif // CPU_RISCV_RELOCINFO_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/riscv.ad =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/riscv.ad @@ -0,0 +1,10584 @@ +// +// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. +// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// RISCV Architecture Description File + +//----------REGISTER DEFINITION BLOCK------------------------------------------ +// This information is used by the matcher and the register allocator to +// describe individual registers and classes of registers within the target +// archtecture. + +register %{ +//----------Architecture Description Register Definitions---------------------- +// General Registers +// "reg_def" name ( register save type, C convention save type, +// ideal register type, encoding ); +// Register Save Types: +// +// NS = No-Save: The register allocator assumes that these registers +// can be used without saving upon entry to the method, & +// that they do not need to be saved at call sites. +// +// SOC = Save-On-Call: The register allocator assumes that these registers +// can be used without saving upon entry to the method, +// but that they must be saved at call sites. +// +// SOE = Save-On-Entry: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, but they do not need to be saved at call +// sites. +// +// AS = Always-Save: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, & that they must be saved at call sites. +// +// Ideal Register Type is used to determine how to save & restore a +// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get +// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. +// +// The encoding number is the actual bit-pattern placed into the opcodes. + +// We must define the 64 bit int registers in two 32 bit halves, the +// real lower register and a virtual upper half register. upper halves +// are used by the register allocator but are not actually supplied as +// operands to memory ops. +// +// follow the C1 compiler in making registers +// +// x7, x9-x17, x27-x31 volatile (caller save) +// x0-x4, x8, x23 system (no save, no allocate) +// x5-x6 non-allocatable (so we can use them as temporary regs) + +// +// as regards Java usage. we don't use any callee save registers +// because this makes it difficult to de-optimise a frame (see comment +// in x86 implementation of Deoptimization::unwind_callee_save_values) +// + +// General Registers + +reg_def R0 ( NS, NS, Op_RegI, 0, x0->as_VMReg() ); // zr +reg_def R0_H ( NS, NS, Op_RegI, 0, x0->as_VMReg()->next() ); +reg_def R1 ( NS, SOC, Op_RegI, 1, x1->as_VMReg() ); // ra +reg_def R1_H ( NS, SOC, Op_RegI, 1, x1->as_VMReg()->next() ); +reg_def R2 ( NS, SOE, Op_RegI, 2, x2->as_VMReg() ); // sp +reg_def R2_H ( NS, SOE, Op_RegI, 2, x2->as_VMReg()->next() ); +reg_def R3 ( NS, NS, Op_RegI, 3, x3->as_VMReg() ); // gp +reg_def R3_H ( NS, NS, Op_RegI, 3, x3->as_VMReg()->next() ); +reg_def R4 ( NS, NS, Op_RegI, 4, x4->as_VMReg() ); // tp +reg_def R4_H ( NS, NS, Op_RegI, 4, x4->as_VMReg()->next() ); +reg_def R7 ( SOC, SOC, Op_RegI, 7, x7->as_VMReg() ); +reg_def R7_H ( SOC, SOC, Op_RegI, 7, x7->as_VMReg()->next() ); +reg_def R8 ( NS, SOE, Op_RegI, 8, x8->as_VMReg() ); // fp +reg_def R8_H ( NS, SOE, Op_RegI, 8, x8->as_VMReg()->next() ); +reg_def R9 ( SOC, SOE, Op_RegI, 9, x9->as_VMReg() ); +reg_def R9_H ( SOC, SOE, Op_RegI, 9, x9->as_VMReg()->next() ); +reg_def R10 ( SOC, SOC, Op_RegI, 10, x10->as_VMReg() ); +reg_def R10_H ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next()); +reg_def R11 ( SOC, SOC, Op_RegI, 11, x11->as_VMReg() ); +reg_def R11_H ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next()); +reg_def R12 ( SOC, SOC, Op_RegI, 12, x12->as_VMReg() ); +reg_def R12_H ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next()); +reg_def R13 ( SOC, SOC, Op_RegI, 13, x13->as_VMReg() ); +reg_def R13_H ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next()); +reg_def R14 ( SOC, SOC, Op_RegI, 14, x14->as_VMReg() ); +reg_def R14_H ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next()); +reg_def R15 ( SOC, SOC, Op_RegI, 15, x15->as_VMReg() ); +reg_def R15_H ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next()); +reg_def R16 ( SOC, SOC, Op_RegI, 16, x16->as_VMReg() ); +reg_def R16_H ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next()); +reg_def R17 ( SOC, SOC, Op_RegI, 17, x17->as_VMReg() ); +reg_def R17_H ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next()); +reg_def R18 ( SOC, SOE, Op_RegI, 18, x18->as_VMReg() ); +reg_def R18_H ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next()); +reg_def R19 ( SOC, SOE, Op_RegI, 19, x19->as_VMReg() ); +reg_def R19_H ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next()); +reg_def R20 ( SOC, SOE, Op_RegI, 20, x20->as_VMReg() ); // caller esp +reg_def R20_H ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next()); +reg_def R21 ( SOC, SOE, Op_RegI, 21, x21->as_VMReg() ); +reg_def R21_H ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next()); +reg_def R22 ( SOC, SOE, Op_RegI, 22, x22->as_VMReg() ); +reg_def R22_H ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next()); +reg_def R23 ( NS, SOE, Op_RegI, 23, x23->as_VMReg() ); // java thread +reg_def R23_H ( NS, SOE, Op_RegI, 23, x23->as_VMReg()->next()); +reg_def R24 ( SOC, SOE, Op_RegI, 24, x24->as_VMReg() ); +reg_def R24_H ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next()); +reg_def R25 ( SOC, SOE, Op_RegI, 25, x25->as_VMReg() ); +reg_def R25_H ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next()); +reg_def R26 ( SOC, SOE, Op_RegI, 26, x26->as_VMReg() ); +reg_def R26_H ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next()); +reg_def R27 ( SOC, SOE, Op_RegI, 27, x27->as_VMReg() ); // heapbase +reg_def R27_H ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next()); +reg_def R28 ( SOC, SOC, Op_RegI, 28, x28->as_VMReg() ); +reg_def R28_H ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next()); +reg_def R29 ( SOC, SOC, Op_RegI, 29, x29->as_VMReg() ); +reg_def R29_H ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next()); +reg_def R30 ( SOC, SOC, Op_RegI, 30, x30->as_VMReg() ); +reg_def R30_H ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next()); +reg_def R31 ( SOC, SOC, Op_RegI, 31, x31->as_VMReg() ); +reg_def R31_H ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next()); + +// ---------------------------- +// Float/Double Registers +// ---------------------------- + +// Double Registers + +// The rules of ADL require that double registers be defined in pairs. +// Each pair must be two 32-bit values, but not necessarily a pair of +// single float registers. In each pair, ADLC-assigned register numbers +// must be adjacent, with the lower number even. Finally, when the +// CPU stores such a register pair to memory, the word associated with +// the lower ADLC-assigned number must be stored to the lower address. + +// RISCV has 32 floating-point registers. Each can store a single +// or double precision floating-point value. + +// for Java use float registers f0-f31 are always save on call whereas +// the platform ABI treats f8-f9 and f18-f27 as callee save). Other +// float registers are SOC as per the platform spec + +reg_def F0 ( SOC, SOC, Op_RegF, 0, f0->as_VMReg() ); +reg_def F0_H ( SOC, SOC, Op_RegF, 0, f0->as_VMReg()->next() ); +reg_def F1 ( SOC, SOC, Op_RegF, 1, f1->as_VMReg() ); +reg_def F1_H ( SOC, SOC, Op_RegF, 1, f1->as_VMReg()->next() ); +reg_def F2 ( SOC, SOC, Op_RegF, 2, f2->as_VMReg() ); +reg_def F2_H ( SOC, SOC, Op_RegF, 2, f2->as_VMReg()->next() ); +reg_def F3 ( SOC, SOC, Op_RegF, 3, f3->as_VMReg() ); +reg_def F3_H ( SOC, SOC, Op_RegF, 3, f3->as_VMReg()->next() ); +reg_def F4 ( SOC, SOC, Op_RegF, 4, f4->as_VMReg() ); +reg_def F4_H ( SOC, SOC, Op_RegF, 4, f4->as_VMReg()->next() ); +reg_def F5 ( SOC, SOC, Op_RegF, 5, f5->as_VMReg() ); +reg_def F5_H ( SOC, SOC, Op_RegF, 5, f5->as_VMReg()->next() ); +reg_def F6 ( SOC, SOC, Op_RegF, 6, f6->as_VMReg() ); +reg_def F6_H ( SOC, SOC, Op_RegF, 6, f6->as_VMReg()->next() ); +reg_def F7 ( SOC, SOC, Op_RegF, 7, f7->as_VMReg() ); +reg_def F7_H ( SOC, SOC, Op_RegF, 7, f7->as_VMReg()->next() ); +reg_def F8 ( SOC, SOE, Op_RegF, 8, f8->as_VMReg() ); +reg_def F8_H ( SOC, SOE, Op_RegF, 8, f8->as_VMReg()->next() ); +reg_def F9 ( SOC, SOE, Op_RegF, 9, f9->as_VMReg() ); +reg_def F9_H ( SOC, SOE, Op_RegF, 9, f9->as_VMReg()->next() ); +reg_def F10 ( SOC, SOC, Op_RegF, 10, f10->as_VMReg() ); +reg_def F10_H ( SOC, SOC, Op_RegF, 10, f10->as_VMReg()->next() ); +reg_def F11 ( SOC, SOC, Op_RegF, 11, f11->as_VMReg() ); +reg_def F11_H ( SOC, SOC, Op_RegF, 11, f11->as_VMReg()->next() ); +reg_def F12 ( SOC, SOC, Op_RegF, 12, f12->as_VMReg() ); +reg_def F12_H ( SOC, SOC, Op_RegF, 12, f12->as_VMReg()->next() ); +reg_def F13 ( SOC, SOC, Op_RegF, 13, f13->as_VMReg() ); +reg_def F13_H ( SOC, SOC, Op_RegF, 13, f13->as_VMReg()->next() ); +reg_def F14 ( SOC, SOC, Op_RegF, 14, f14->as_VMReg() ); +reg_def F14_H ( SOC, SOC, Op_RegF, 14, f14->as_VMReg()->next() ); +reg_def F15 ( SOC, SOC, Op_RegF, 15, f15->as_VMReg() ); +reg_def F15_H ( SOC, SOC, Op_RegF, 15, f15->as_VMReg()->next() ); +reg_def F16 ( SOC, SOC, Op_RegF, 16, f16->as_VMReg() ); +reg_def F16_H ( SOC, SOC, Op_RegF, 16, f16->as_VMReg()->next() ); +reg_def F17 ( SOC, SOC, Op_RegF, 17, f17->as_VMReg() ); +reg_def F17_H ( SOC, SOC, Op_RegF, 17, f17->as_VMReg()->next() ); +reg_def F18 ( SOC, SOE, Op_RegF, 18, f18->as_VMReg() ); +reg_def F18_H ( SOC, SOE, Op_RegF, 18, f18->as_VMReg()->next() ); +reg_def F19 ( SOC, SOE, Op_RegF, 19, f19->as_VMReg() ); +reg_def F19_H ( SOC, SOE, Op_RegF, 19, f19->as_VMReg()->next() ); +reg_def F20 ( SOC, SOE, Op_RegF, 20, f20->as_VMReg() ); +reg_def F20_H ( SOC, SOE, Op_RegF, 20, f20->as_VMReg()->next() ); +reg_def F21 ( SOC, SOE, Op_RegF, 21, f21->as_VMReg() ); +reg_def F21_H ( SOC, SOE, Op_RegF, 21, f21->as_VMReg()->next() ); +reg_def F22 ( SOC, SOE, Op_RegF, 22, f22->as_VMReg() ); +reg_def F22_H ( SOC, SOE, Op_RegF, 22, f22->as_VMReg()->next() ); +reg_def F23 ( SOC, SOE, Op_RegF, 23, f23->as_VMReg() ); +reg_def F23_H ( SOC, SOE, Op_RegF, 23, f23->as_VMReg()->next() ); +reg_def F24 ( SOC, SOE, Op_RegF, 24, f24->as_VMReg() ); +reg_def F24_H ( SOC, SOE, Op_RegF, 24, f24->as_VMReg()->next() ); +reg_def F25 ( SOC, SOE, Op_RegF, 25, f25->as_VMReg() ); +reg_def F25_H ( SOC, SOE, Op_RegF, 25, f25->as_VMReg()->next() ); +reg_def F26 ( SOC, SOE, Op_RegF, 26, f26->as_VMReg() ); +reg_def F26_H ( SOC, SOE, Op_RegF, 26, f26->as_VMReg()->next() ); +reg_def F27 ( SOC, SOE, Op_RegF, 27, f27->as_VMReg() ); +reg_def F27_H ( SOC, SOE, Op_RegF, 27, f27->as_VMReg()->next() ); +reg_def F28 ( SOC, SOC, Op_RegF, 28, f28->as_VMReg() ); +reg_def F28_H ( SOC, SOC, Op_RegF, 28, f28->as_VMReg()->next() ); +reg_def F29 ( SOC, SOC, Op_RegF, 29, f29->as_VMReg() ); +reg_def F29_H ( SOC, SOC, Op_RegF, 29, f29->as_VMReg()->next() ); +reg_def F30 ( SOC, SOC, Op_RegF, 30, f30->as_VMReg() ); +reg_def F30_H ( SOC, SOC, Op_RegF, 30, f30->as_VMReg()->next() ); +reg_def F31 ( SOC, SOC, Op_RegF, 31, f31->as_VMReg() ); +reg_def F31_H ( SOC, SOC, Op_RegF, 31, f31->as_VMReg()->next() ); + +// ---------------------------- +// Vector Registers +// ---------------------------- + +// For RVV vector registers, we simply extend vector register size to 4 +// 'logical' slots. This is nominally 128 bits but it actually covers +// all possible 'physical' RVV vector register lengths from 128 ~ 1024 +// bits. The 'physical' RVV vector register length is detected during +// startup, so the register allocator is able to identify the correct +// number of bytes needed for an RVV spill/unspill. + +reg_def V0 ( SOC, SOC, Op_VecA, 0, v0->as_VMReg() ); +reg_def V0_H ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next() ); +reg_def V0_J ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(2) ); +reg_def V0_K ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(3) ); + +reg_def V1 ( SOC, SOC, Op_VecA, 1, v1->as_VMReg() ); +reg_def V1_H ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next() ); +reg_def V1_J ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(2) ); +reg_def V1_K ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(3) ); + +reg_def V2 ( SOC, SOC, Op_VecA, 2, v2->as_VMReg() ); +reg_def V2_H ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next() ); +reg_def V2_J ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(2) ); +reg_def V2_K ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(3) ); + +reg_def V3 ( SOC, SOC, Op_VecA, 3, v3->as_VMReg() ); +reg_def V3_H ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next() ); +reg_def V3_J ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(2) ); +reg_def V3_K ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(3) ); + +reg_def V4 ( SOC, SOC, Op_VecA, 4, v4->as_VMReg() ); +reg_def V4_H ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next() ); +reg_def V4_J ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(2) ); +reg_def V4_K ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(3) ); + +reg_def V5 ( SOC, SOC, Op_VecA, 5, v5->as_VMReg() ); +reg_def V5_H ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next() ); +reg_def V5_J ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(2) ); +reg_def V5_K ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(3) ); + +reg_def V6 ( SOC, SOC, Op_VecA, 6, v6->as_VMReg() ); +reg_def V6_H ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next() ); +reg_def V6_J ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(2) ); +reg_def V6_K ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(3) ); + +reg_def V7 ( SOC, SOC, Op_VecA, 7, v7->as_VMReg() ); +reg_def V7_H ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next() ); +reg_def V7_J ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(2) ); +reg_def V7_K ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(3) ); + +reg_def V8 ( SOC, SOC, Op_VecA, 8, v8->as_VMReg() ); +reg_def V8_H ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next() ); +reg_def V8_J ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(2) ); +reg_def V8_K ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(3) ); + +reg_def V9 ( SOC, SOC, Op_VecA, 9, v9->as_VMReg() ); +reg_def V9_H ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next() ); +reg_def V9_J ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(2) ); +reg_def V9_K ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(3) ); + +reg_def V10 ( SOC, SOC, Op_VecA, 10, v10->as_VMReg() ); +reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next() ); +reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) ); +reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) ); + +reg_def V11 ( SOC, SOC, Op_VecA, 11, v11->as_VMReg() ); +reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next() ); +reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) ); +reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) ); + +reg_def V12 ( SOC, SOC, Op_VecA, 12, v12->as_VMReg() ); +reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next() ); +reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) ); +reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) ); + +reg_def V13 ( SOC, SOC, Op_VecA, 13, v13->as_VMReg() ); +reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next() ); +reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) ); +reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) ); + +reg_def V14 ( SOC, SOC, Op_VecA, 14, v14->as_VMReg() ); +reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next() ); +reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) ); +reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) ); + +reg_def V15 ( SOC, SOC, Op_VecA, 15, v15->as_VMReg() ); +reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next() ); +reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) ); +reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) ); + +reg_def V16 ( SOC, SOC, Op_VecA, 16, v16->as_VMReg() ); +reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next() ); +reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) ); +reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) ); + +reg_def V17 ( SOC, SOC, Op_VecA, 17, v17->as_VMReg() ); +reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next() ); +reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) ); +reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) ); + +reg_def V18 ( SOC, SOC, Op_VecA, 18, v18->as_VMReg() ); +reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next() ); +reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) ); +reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) ); + +reg_def V19 ( SOC, SOC, Op_VecA, 19, v19->as_VMReg() ); +reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next() ); +reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) ); +reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) ); + +reg_def V20 ( SOC, SOC, Op_VecA, 20, v20->as_VMReg() ); +reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next() ); +reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) ); +reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) ); + +reg_def V21 ( SOC, SOC, Op_VecA, 21, v21->as_VMReg() ); +reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next() ); +reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) ); +reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) ); + +reg_def V22 ( SOC, SOC, Op_VecA, 22, v22->as_VMReg() ); +reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next() ); +reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) ); +reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) ); + +reg_def V23 ( SOC, SOC, Op_VecA, 23, v23->as_VMReg() ); +reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next() ); +reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) ); +reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) ); + +reg_def V24 ( SOC, SOC, Op_VecA, 24, v24->as_VMReg() ); +reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next() ); +reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) ); +reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) ); + +reg_def V25 ( SOC, SOC, Op_VecA, 25, v25->as_VMReg() ); +reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next() ); +reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) ); +reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) ); + +reg_def V26 ( SOC, SOC, Op_VecA, 26, v26->as_VMReg() ); +reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next() ); +reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) ); +reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) ); + +reg_def V27 ( SOC, SOC, Op_VecA, 27, v27->as_VMReg() ); +reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next() ); +reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) ); +reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) ); + +reg_def V28 ( SOC, SOC, Op_VecA, 28, v28->as_VMReg() ); +reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next() ); +reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) ); +reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) ); + +reg_def V29 ( SOC, SOC, Op_VecA, 29, v29->as_VMReg() ); +reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next() ); +reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) ); +reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) ); + +reg_def V30 ( SOC, SOC, Op_VecA, 30, v30->as_VMReg() ); +reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next() ); +reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) ); +reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) ); + +reg_def V31 ( SOC, SOC, Op_VecA, 31, v31->as_VMReg() ); +reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next() ); +reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) ); +reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) ); + +// ---------------------------- +// Special Registers +// ---------------------------- + +// On riscv, the physical flag register is missing, so we use t1 instead, +// to bridge the RegFlag semantics in share/opto + +reg_def RFLAGS (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg() ); + +// Specify priority of register selection within phases of register +// allocation. Highest priority is first. A useful heuristic is to +// give registers a low priority when they are required by machine +// instructions, like EAX and EDX on I486, and choose no-save registers +// before save-on-call, & save-on-call before save-on-entry. Registers +// which participate in fixed calling sequences should come last. +// Registers which are used as pairs must fall on an even boundary. + +alloc_class chunk0( + // volatiles + R7, R7_H, + R28, R28_H, + R29, R29_H, + R30, R30_H, + R31, R31_H, + + // arg registers + R10, R10_H, + R11, R11_H, + R12, R12_H, + R13, R13_H, + R14, R14_H, + R15, R15_H, + R16, R16_H, + R17, R17_H, + + // non-volatiles + R9, R9_H, + R18, R18_H, + R19, R19_H, + R20, R20_H, + R21, R21_H, + R22, R22_H, + R24, R24_H, + R25, R25_H, + R26, R26_H, + + // non-allocatable registers + R23, R23_H, // java thread + R27, R27_H, // heapbase + R4, R4_H, // thread + R8, R8_H, // fp + R0, R0_H, // zero + R1, R1_H, // ra + R2, R2_H, // sp + R3, R3_H, // gp +); + +alloc_class chunk1( + + // no save + F0, F0_H, + F1, F1_H, + F2, F2_H, + F3, F3_H, + F4, F4_H, + F5, F5_H, + F6, F6_H, + F7, F7_H, + F28, F28_H, + F29, F29_H, + F30, F30_H, + F31, F31_H, + + // arg registers + F10, F10_H, + F11, F11_H, + F12, F12_H, + F13, F13_H, + F14, F14_H, + F15, F15_H, + F16, F16_H, + F17, F17_H, + + // non-volatiles + F8, F8_H, + F9, F9_H, + F18, F18_H, + F19, F19_H, + F20, F20_H, + F21, F21_H, + F22, F22_H, + F23, F23_H, + F24, F24_H, + F25, F25_H, + F26, F26_H, + F27, F27_H, +); + +alloc_class chunk2( + V0, V0_H, V0_J, V0_K, + V1, V1_H, V1_J, V1_K, + V2, V2_H, V2_J, V2_K, + V3, V3_H, V3_J, V3_K, + V4, V4_H, V4_J, V4_K, + V5, V5_H, V5_J, V5_K, + V6, V6_H, V6_J, V6_K, + V7, V7_H, V7_J, V7_K, + V8, V8_H, V8_J, V8_K, + V9, V9_H, V9_J, V9_K, + V10, V10_H, V10_J, V10_K, + V11, V11_H, V11_J, V11_K, + V12, V12_H, V12_J, V12_K, + V13, V13_H, V13_J, V13_K, + V14, V14_H, V14_J, V14_K, + V15, V15_H, V15_J, V15_K, + V16, V16_H, V16_J, V16_K, + V17, V17_H, V17_J, V17_K, + V18, V18_H, V18_J, V18_K, + V19, V19_H, V19_J, V19_K, + V20, V20_H, V20_J, V20_K, + V21, V21_H, V21_J, V21_K, + V22, V22_H, V22_J, V22_K, + V23, V23_H, V23_J, V23_K, + V24, V24_H, V24_J, V24_K, + V25, V25_H, V25_J, V25_K, + V26, V26_H, V26_J, V26_K, + V27, V27_H, V27_J, V27_K, + V28, V28_H, V28_J, V28_K, + V29, V29_H, V29_J, V29_K, + V30, V30_H, V30_J, V30_K, + V31, V31_H, V31_J, V31_K, +); + +alloc_class chunk3(RFLAGS); + +//----------Architecture Description Register Classes-------------------------- +// Several register classes are automatically defined based upon information in +// this architecture description. +// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) +// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) +// + +// Class for all 32 bit general purpose registers +reg_class all_reg32( + R0, + R1, + R2, + R3, + R4, + R7, + R8, + R9, + R10, + R11, + R12, + R13, + R14, + R15, + R16, + R17, + R18, + R19, + R20, + R21, + R22, + R23, + R24, + R25, + R26, + R27, + R28, + R29, + R30, + R31 +); + +// Class for any 32 bit integer registers (excluding zr) +reg_class any_reg32 %{ + return _ANY_REG32_mask; +%} + +// Singleton class for R10 int register +reg_class int_r10_reg(R10); + +// Singleton class for R12 int register +reg_class int_r12_reg(R12); + +// Singleton class for R13 int register +reg_class int_r13_reg(R13); + +// Singleton class for R14 int register +reg_class int_r14_reg(R14); + +// Class for all long integer registers +reg_class all_reg( + R0, R0_H, + R1, R1_H, + R2, R2_H, + R3, R3_H, + R4, R4_H, + R7, R7_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R12, R12_H, + R13, R13_H, + R14, R14_H, + R15, R15_H, + R16, R16_H, + R17, R17_H, + R18, R18_H, + R19, R19_H, + R20, R20_H, + R21, R21_H, + R22, R22_H, + R23, R23_H, + R24, R24_H, + R25, R25_H, + R26, R26_H, + R27, R27_H, + R28, R28_H, + R29, R29_H, + R30, R30_H, + R31, R31_H +); + +// Class for all long integer registers (excluding zr) +reg_class any_reg %{ + return _ANY_REG_mask; +%} + +// Class for non-allocatable 32 bit registers +reg_class non_allocatable_reg32( + R0, // zr + R1, // ra + R2, // sp + R3, // gp + R4, // tp + R23 // java thread +); + +// Class for non-allocatable 64 bit registers +reg_class non_allocatable_reg( + R0, R0_H, // zr + R1, R1_H, // ra + R2, R2_H, // sp + R3, R3_H, // gp + R4, R4_H, // tp + R23, R23_H // java thread +); + +reg_class no_special_reg32 %{ + return _NO_SPECIAL_REG32_mask; +%} + +reg_class no_special_reg %{ + return _NO_SPECIAL_REG_mask; +%} + +reg_class ptr_reg %{ + return _PTR_REG_mask; +%} + +reg_class no_special_ptr_reg %{ + return _NO_SPECIAL_PTR_REG_mask; +%} + +// Class for 64 bit register r10 +reg_class r10_reg( + R10, R10_H +); + +// Class for 64 bit register r11 +reg_class r11_reg( + R11, R11_H +); + +// Class for 64 bit register r12 +reg_class r12_reg( + R12, R12_H +); + +// Class for 64 bit register r13 +reg_class r13_reg( + R13, R13_H +); + +// Class for 64 bit register r14 +reg_class r14_reg( + R14, R14_H +); + +// Class for 64 bit register r15 +reg_class r15_reg( + R15, R15_H +); + +// Class for 64 bit register r16 +reg_class r16_reg( + R16, R16_H +); + +// Class for method register +reg_class method_reg( + R31, R31_H +); + +// Class for heapbase register +reg_class heapbase_reg( + R27, R27_H +); + +// Class for java thread register +reg_class java_thread_reg( + R23, R23_H +); + +reg_class r28_reg( + R28, R28_H +); + +reg_class r29_reg( + R29, R29_H +); + +reg_class r30_reg( + R30, R30_H +); + +// Class for zero registesr +reg_class zr_reg( + R0, R0_H +); + +// Class for thread register +reg_class thread_reg( + R4, R4_H +); + +// Class for frame pointer register +reg_class fp_reg( + R8, R8_H +); + +// Class for link register +reg_class ra_reg( + R1, R1_H +); + +// Class for long sp register +reg_class sp_reg( + R2, R2_H +); + +// Class for all float registers +reg_class float_reg( + F0, + F1, + F2, + F3, + F4, + F5, + F6, + F7, + F8, + F9, + F10, + F11, + F12, + F13, + F14, + F15, + F16, + F17, + F18, + F19, + F20, + F21, + F22, + F23, + F24, + F25, + F26, + F27, + F28, + F29, + F30, + F31 +); + +// Double precision float registers have virtual `high halves' that +// are needed by the allocator. +// Class for all double registers +reg_class double_reg( + F0, F0_H, + F1, F1_H, + F2, F2_H, + F3, F3_H, + F4, F4_H, + F5, F5_H, + F6, F6_H, + F7, F7_H, + F8, F8_H, + F9, F9_H, + F10, F10_H, + F11, F11_H, + F12, F12_H, + F13, F13_H, + F14, F14_H, + F15, F15_H, + F16, F16_H, + F17, F17_H, + F18, F18_H, + F19, F19_H, + F20, F20_H, + F21, F21_H, + F22, F22_H, + F23, F23_H, + F24, F24_H, + F25, F25_H, + F26, F26_H, + F27, F27_H, + F28, F28_H, + F29, F29_H, + F30, F30_H, + F31, F31_H +); + +// Class for all RVV vector registers +reg_class vectora_reg( + V1, V1_H, V1_J, V1_K, + V2, V2_H, V2_J, V2_K, + V3, V3_H, V3_J, V3_K, + V4, V4_H, V4_J, V4_K, + V5, V5_H, V5_J, V5_K, + V6, V6_H, V6_J, V6_K, + V7, V7_H, V7_J, V7_K, + V8, V8_H, V8_J, V8_K, + V9, V9_H, V9_J, V9_K, + V10, V10_H, V10_J, V10_K, + V11, V11_H, V11_J, V11_K, + V12, V12_H, V12_J, V12_K, + V13, V13_H, V13_J, V13_K, + V14, V14_H, V14_J, V14_K, + V15, V15_H, V15_J, V15_K, + V16, V16_H, V16_J, V16_K, + V17, V17_H, V17_J, V17_K, + V18, V18_H, V18_J, V18_K, + V19, V19_H, V19_J, V19_K, + V20, V20_H, V20_J, V20_K, + V21, V21_H, V21_J, V21_K, + V22, V22_H, V22_J, V22_K, + V23, V23_H, V23_J, V23_K, + V24, V24_H, V24_J, V24_K, + V25, V25_H, V25_J, V25_K, + V26, V26_H, V26_J, V26_K, + V27, V27_H, V27_J, V27_K, + V28, V28_H, V28_J, V28_K, + V29, V29_H, V29_J, V29_K, + V30, V30_H, V30_J, V30_K, + V31, V31_H, V31_J, V31_K +); + +// Class for 64 bit register f0 +reg_class f0_reg( + F0, F0_H +); + +// Class for 64 bit register f1 +reg_class f1_reg( + F1, F1_H +); + +// Class for 64 bit register f2 +reg_class f2_reg( + F2, F2_H +); + +// Class for 64 bit register f3 +reg_class f3_reg( + F3, F3_H +); + +// class for vector register v1 +reg_class v1_reg( + V1, V1_H, V1_J, V1_K +); + +// class for vector register v2 +reg_class v2_reg( + V2, V2_H, V2_J, V2_K +); + +// class for vector register v3 +reg_class v3_reg( + V3, V3_H, V3_J, V3_K +); + +// class for vector register v4 +reg_class v4_reg( + V4, V4_H, V4_J, V4_K +); + +// class for vector register v5 +reg_class v5_reg( + V5, V5_H, V5_J, V5_K +); + +// class for condition codes +reg_class reg_flags(RFLAGS); +%} + +//----------DEFINITION BLOCK--------------------------------------------------- +// Define name --> value mappings to inform the ADLC of an integer valued name +// Current support includes integer values in the range [0, 0x7FFFFFFF] +// Format: +// int_def ( , ); +// Generated Code in ad_.hpp +// #define () +// // value == +// Generated code in ad_.cpp adlc_verification() +// assert( == , "Expect () to equal "); +// + +// we follow the ppc-aix port in using a simple cost model which ranks +// register operations as cheap, memory ops as more expensive and +// branches as most expensive. the first two have a low as well as a +// normal cost. huge cost appears to be a way of saying don't do +// something + +definitions %{ + // The default cost (of a register move instruction). + int_def DEFAULT_COST ( 100, 100); + int_def ALU_COST ( 100, 1 * DEFAULT_COST); // unknown, const, arith, shift, slt, + // multi, auipc, nop, logical, move + int_def LOAD_COST ( 300, 3 * DEFAULT_COST); // load, fpload + int_def STORE_COST ( 100, 1 * DEFAULT_COST); // store, fpstore + int_def XFER_COST ( 300, 3 * DEFAULT_COST); // mfc, mtc, fcvt, fmove, fcmp + int_def BRANCH_COST ( 100, 1 * DEFAULT_COST); // branch, jmp, call + int_def IMUL_COST ( 1000, 10 * DEFAULT_COST); // imul + int_def IDIVSI_COST ( 3400, 34 * DEFAULT_COST); // idivdi + int_def IDIVDI_COST ( 6600, 66 * DEFAULT_COST); // idivsi + int_def FMUL_SINGLE_COST ( 500, 5 * DEFAULT_COST); // fadd, fmul, fmadd + int_def FMUL_DOUBLE_COST ( 700, 7 * DEFAULT_COST); // fadd, fmul, fmadd + int_def FDIV_COST ( 2000, 20 * DEFAULT_COST); // fdiv + int_def FSQRT_COST ( 2500, 25 * DEFAULT_COST); // fsqrt + int_def VOLATILE_REF_COST ( 1000, 10 * DEFAULT_COST); +%} + + + +//----------SOURCE BLOCK------------------------------------------------------- +// This is a block of C++ code which provides values, functions, and +// definitions necessary in the rest of the architecture description + +source_hpp %{ + +#include "asm/macroAssembler.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "opto/addnode.hpp" +#include "opto/convertnode.hpp" + +extern RegMask _ANY_REG32_mask; +extern RegMask _ANY_REG_mask; +extern RegMask _PTR_REG_mask; +extern RegMask _NO_SPECIAL_REG32_mask; +extern RegMask _NO_SPECIAL_REG_mask; +extern RegMask _NO_SPECIAL_PTR_REG_mask; + +class CallStubImpl { + + //-------------------------------------------------------------- + //---< Used for optimization in Compile::shorten_branches >--- + //-------------------------------------------------------------- + + public: + // Size of call trampoline stub. + static uint size_call_trampoline() { + return 0; // no call trampolines on this platform + } + + // number of relocations needed by a call trampoline stub + static uint reloc_call_trampoline() { + return 0; // no call trampolines on this platform + } +}; + +class HandlerImpl { + + public: + + static int emit_exception_handler(CodeBuffer &cbuf); + static int emit_deopt_handler(CodeBuffer& cbuf); + + static uint size_exception_handler() { + return MacroAssembler::far_branch_size(); + } + + static uint size_deopt_handler() { + // count auipc + far branch + return NativeInstruction::instruction_size + MacroAssembler::far_branch_size(); + } +}; + +class Node::PD { +public: + enum NodeFlags { + _last_flag = Node::_last_flag + }; +}; + +bool is_CAS(int opcode, bool maybe_volatile); + +// predicate controlling translation of CompareAndSwapX +bool needs_acquiring_load_reserved(const Node *load); + +// predicate controlling addressing modes +bool size_fits_all_mem_uses(AddPNode* addp, int shift); +%} + +source %{ + +// Derived RegMask with conditionally allocatable registers + +RegMask _ANY_REG32_mask; +RegMask _ANY_REG_mask; +RegMask _PTR_REG_mask; +RegMask _NO_SPECIAL_REG32_mask; +RegMask _NO_SPECIAL_REG_mask; +RegMask _NO_SPECIAL_PTR_REG_mask; + +void reg_mask_init() { + + _ANY_REG32_mask = _ALL_REG32_mask; + _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg())); + + _ANY_REG_mask = _ALL_REG_mask; + _ANY_REG_mask.SUBTRACT(_ZR_REG_mask); + + _PTR_REG_mask = _ALL_REG_mask; + _PTR_REG_mask.SUBTRACT(_ZR_REG_mask); + + _NO_SPECIAL_REG32_mask = _ALL_REG32_mask; + _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask); + + _NO_SPECIAL_REG_mask = _ALL_REG_mask; + _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); + + _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask; + _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); + + // x27 is not allocatable when compressed oops is on + if (UseCompressedOops) { + _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg())); + _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); + _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); + } + + // x8 is not allocatable when PreserveFramePointer is on + if (PreserveFramePointer) { + _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg())); + _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask); + _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask); + } +} + +void PhaseOutput::pd_perform_mach_node_analysis() { +} + +int MachNode::pd_alignment_required() const { + return 1; +} + +int MachNode::compute_padding(int current_offset) const { + return 0; +} + +// is_CAS(int opcode, bool maybe_volatile) +// +// return true if opcode is one of the possible CompareAndSwapX +// values otherwise false. +bool is_CAS(int opcode, bool maybe_volatile) +{ + switch (opcode) { + // We handle these + case Op_CompareAndSwapI: + case Op_CompareAndSwapL: + case Op_CompareAndSwapP: + case Op_CompareAndSwapN: + case Op_ShenandoahCompareAndSwapP: + case Op_ShenandoahCompareAndSwapN: + case Op_CompareAndSwapB: + case Op_CompareAndSwapS: + case Op_GetAndSetI: + case Op_GetAndSetL: + case Op_GetAndSetP: + case Op_GetAndSetN: + case Op_GetAndAddI: + case Op_GetAndAddL: + return true; + case Op_CompareAndExchangeI: + case Op_CompareAndExchangeN: + case Op_CompareAndExchangeB: + case Op_CompareAndExchangeS: + case Op_CompareAndExchangeL: + case Op_CompareAndExchangeP: + case Op_WeakCompareAndSwapB: + case Op_WeakCompareAndSwapS: + case Op_WeakCompareAndSwapI: + case Op_WeakCompareAndSwapL: + case Op_WeakCompareAndSwapP: + case Op_WeakCompareAndSwapN: + case Op_ShenandoahWeakCompareAndSwapP: + case Op_ShenandoahWeakCompareAndSwapN: + case Op_ShenandoahCompareAndExchangeP: + case Op_ShenandoahCompareAndExchangeN: + return maybe_volatile; + default: + return false; + } +} + +// predicate controlling translation of CAS +// +// returns true if CAS needs to use an acquiring load otherwise false +bool needs_acquiring_load_reserved(const Node *n) +{ + assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap"); + + LoadStoreNode* ldst = n->as_LoadStore(); + if (n != NULL && is_CAS(n->Opcode(), false)) { + assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar"); + } else { + return ldst != NULL && ldst->trailing_membar() != NULL; + } + // so we can just return true here + return true; +} +#define __ _masm. + +// advance declarations for helper functions to convert register +// indices to register objects + +// the ad file has to provide implementations of certain methods +// expected by the generic code +// +// REQUIRED FUNCTIONALITY + +//============================================================================= + +// !!!!! Special hack to get all types of calls to specify the byte offset +// from the start of the call to the point where the return address +// will point. + +int MachCallStaticJavaNode::ret_addr_offset() +{ + // jal + return 1 * NativeInstruction::instruction_size; +} + +int MachCallDynamicJavaNode::ret_addr_offset() +{ + return 7 * NativeInstruction::instruction_size; // movptr, jal +} + +int MachCallRuntimeNode::ret_addr_offset() { + // for generated stubs the call will be + // jal(addr) + // or with far branches + // jal(trampoline_stub) + // for real runtime callouts it will be 11 instructions + // see riscv_enc_java_to_runtime + // la(t1, retaddr) -> auipc + addi + // la(t0, RuntimeAddress(addr)) -> lui + addi + slli + addi + slli + addi + // addi(sp, sp, -2 * wordSize) -> addi + // sd(t1, Address(sp, wordSize)) -> sd + // jalr(t0) -> jalr + CodeBlob *cb = CodeCache::find_blob(_entry_point); + if (cb != NULL) { + return 1 * NativeInstruction::instruction_size; + } else { + return 11 * NativeInstruction::instruction_size; + } +} + +int MachCallNativeNode::ret_addr_offset() { + Unimplemented(); + return -1; +} + +// +// Compute padding required for nodes which need alignment +// + +// With RVC a call instruction may get 2-byte aligned. +// The address of the call instruction needs to be 4-byte aligned to +// ensure that it does not span a cache line so that it can be patched. +int CallStaticJavaDirectNode::compute_padding(int current_offset) const +{ + // to make sure the address of jal 4-byte aligned. + return align_up(current_offset, alignment_required()) - current_offset; +} + +// With RVC a call instruction may get 2-byte aligned. +// The address of the call instruction needs to be 4-byte aligned to +// ensure that it does not span a cache line so that it can be patched. +int CallDynamicJavaDirectNode::compute_padding(int current_offset) const +{ + // skip the movptr in MacroAssembler::ic_call(): + // lui + addi + slli + addi + slli + addi + // Though movptr() has already 4-byte aligned with or without RVC, + // We need to prevent from further changes by explicitly calculating the size. + const int movptr_size = 6 * NativeInstruction::instruction_size; + current_offset += movptr_size; + // to make sure the address of jal 4-byte aligned. + return align_up(current_offset, alignment_required()) - current_offset; +} + +//============================================================================= + +#ifndef PRODUCT +void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + assert_cond(st != NULL); + st->print("BREAKPOINT"); +} +#endif + +void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + C2_MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + __ ebreak(); +} + +uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +//============================================================================= + +#ifndef PRODUCT + void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { + st->print("nop \t# %d bytes pad for loops and calls", _count); + } +#endif + + void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { + C2_MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes. + for (int i = 0; i < _count; i++) { + __ nop(); + } + } + + uint MachNopNode::size(PhaseRegAlloc*) const { + return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size); + } + +//============================================================================= +const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; + +int ConstantTable::calculate_table_base_offset() const { + return 0; // absolute addressing, no offset +} + +bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } +void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { + ShouldNotReachHere(); +} + +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { + // Empty encoding +} + +uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { + return 0; +} + +#ifndef PRODUCT +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { + assert_cond(st != NULL); + st->print("-- \t// MachConstantBaseNode (empty encoding)"); +} +#endif + +#ifndef PRODUCT +void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + assert_cond(st != NULL && ra_ != NULL); + Compile* C = ra_->C; + + int framesize = C->output()->frame_slots() << LogBytesPerInt; + + if (C->output()->need_stack_bang(framesize)) { + st->print("# stack bang size=%d\n\t", framesize); + } + + st->print("sd fp, [sp, #%d]\n\t", - 2 * wordSize); + st->print("sd ra, [sp, #%d]\n\t", - wordSize); + if (PreserveFramePointer) { st->print("sub fp, sp, #%d\n\t", 2 * wordSize); } + st->print("sub sp, sp, #%d\n\t", framesize); + + if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) { + st->print("ld t0, [guard]\n\t"); + st->print("membar LoadLoad\n\t"); + st->print("ld t1, [xthread, #thread_disarmed_offset]\n\t"); + st->print("beq t0, t1, skip\n\t"); + st->print("jalr #nmethod_entry_barrier_stub\n\t"); + st->print("j skip\n\t"); + st->print("guard: int\n\t"); + st->print("skip:\n\t"); + } +} +#endif + +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + assert_cond(ra_ != NULL); + Compile* C = ra_->C; + C2_MacroAssembler _masm(&cbuf); + + // n.b. frame size includes space for return pc and fp + const int framesize = C->output()->frame_size_in_bytes(); + + // insert a nop at the start of the prolog so we can patch in a + // branch if we need to invalidate the method later + __ nop(); + + assert_cond(C != NULL); + + if (C->clinit_barrier_on_entry()) { + assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); + + Label L_skip_barrier; + + __ mov_metadata(t1, C->method()->holder()->constant_encoding()); + __ clinit_barrier(t1, t0, &L_skip_barrier); + __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + __ bind(L_skip_barrier); + } + + int bangsize = C->output()->bang_size_in_bytes(); + if (C->output()->need_stack_bang(bangsize)) { + __ generate_stack_overflow_check(bangsize); + } + + __ build_frame(framesize); + + if (C->stub_function() == NULL) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->nmethod_entry_barrier(&_masm); + } + + if (VerifyStackAtCalls) { + Unimplemented(); + } + + C->output()->set_frame_complete(cbuf.insts_size()); + + if (C->has_mach_constant_base_node()) { + // NOTE: We set the table base offset here because users might be + // emitted before MachConstantBaseNode. + ConstantTable& constant_table = C->output()->constant_table(); + constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); + } +} + +uint MachPrologNode::size(PhaseRegAlloc* ra_) const +{ + assert_cond(ra_ != NULL); + return MachNode::size(ra_); // too many variables; just compute it + // the hard way +} + +int MachPrologNode::reloc() const +{ + return 0; +} + +//============================================================================= + +#ifndef PRODUCT +void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + assert_cond(st != NULL && ra_ != NULL); + Compile* C = ra_->C; + assert_cond(C != NULL); + int framesize = C->output()->frame_size_in_bytes(); + + st->print("# pop frame %d\n\t", framesize); + + if (framesize == 0) { + st->print("ld ra, [sp,#%d]\n\t", (2 * wordSize)); + st->print("ld fp, [sp,#%d]\n\t", (3 * wordSize)); + st->print("add sp, sp, #%d\n\t", (2 * wordSize)); + } else { + st->print("add sp, sp, #%d\n\t", framesize); + st->print("ld ra, [sp,#%d]\n\t", - 2 * wordSize); + st->print("ld fp, [sp,#%d]\n\t", - wordSize); + } + + if (do_polling() && C->is_method_compilation()) { + st->print("# test polling word\n\t"); + st->print("ld t0, [xthread,#%d]\n\t", in_bytes(JavaThread::polling_word_offset())); + st->print("bgtu sp, t0, #slow_path"); + } +} +#endif + +void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + assert_cond(ra_ != NULL); + Compile* C = ra_->C; + C2_MacroAssembler _masm(&cbuf); + assert_cond(C != NULL); + int framesize = C->output()->frame_size_in_bytes(); + + __ remove_frame(framesize); + + if (StackReservedPages > 0 && C->has_reserved_stack_access()) { + __ reserved_stack_check(); + } + + if (do_polling() && C->is_method_compilation()) { + Label dummy_label; + Label* code_stub = &dummy_label; + if (!C->output()->in_scratch_emit_size()) { + code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset()); + } + __ relocate(relocInfo::poll_return_type); + __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */); + } +} + +uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { + assert_cond(ra_ != NULL); + // Variable size. Determine dynamically. + return MachNode::size(ra_); +} + +int MachEpilogNode::reloc() const { + // Return number of relocatable values contained in this instruction. + return 1; // 1 for polling page. +} +const Pipeline * MachEpilogNode::pipeline() const { + return MachNode::pipeline_class(); +} + +//============================================================================= + +// Figure out which register class each belongs in: rc_int, rc_float or +// rc_stack. +enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack }; + +static enum RC rc_class(OptoReg::Name reg) { + + if (reg == OptoReg::Bad) { + return rc_bad; + } + + // we have 30 int registers * 2 halves + // (t0 and t1 are omitted) + int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2); + if (reg < slots_of_int_registers) { + return rc_int; + } + + // we have 32 float register * 2 halves + int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers; + if (reg < slots_of_int_registers + slots_of_float_registers) { + return rc_float; + } + + // we have 32 vector register * 4 halves + int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers; + if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) { + return rc_vector; + } + + // Between vector regs & stack is the flags regs. + assert(OptoReg::is_stack(reg), "blow up if spilling flags"); + + return rc_stack; +} + +uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const { + assert_cond(ra_ != NULL); + Compile* C = ra_->C; + + // Get registers to move. + OptoReg::Name src_hi = ra_->get_reg_second(in(1)); + OptoReg::Name src_lo = ra_->get_reg_first(in(1)); + OptoReg::Name dst_hi = ra_->get_reg_second(this); + OptoReg::Name dst_lo = ra_->get_reg_first(this); + + enum RC src_hi_rc = rc_class(src_hi); + enum RC src_lo_rc = rc_class(src_lo); + enum RC dst_hi_rc = rc_class(dst_hi); + enum RC dst_lo_rc = rc_class(dst_lo); + + assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register"); + + if (src_hi != OptoReg::Bad) { + assert((src_lo & 1) == 0 && src_lo + 1 == src_hi && + (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi, + "expected aligned-adjacent pairs"); + } + + if (src_lo == dst_lo && src_hi == dst_hi) { + return 0; // Self copy, no move. + } + + bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi && + (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi; + int src_offset = ra_->reg2offset(src_lo); + int dst_offset = ra_->reg2offset(dst_lo); + + if (bottom_type()->isa_vect() != NULL) { + uint ireg = ideal_reg(); + if (ireg == Op_VecA && cbuf) { + C2_MacroAssembler _masm(cbuf); + Assembler::CompressibleRegion cr(&_masm); + int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { + // stack to stack + __ spill_copy_vector_stack_to_stack(src_offset, dst_offset, + vector_reg_size_in_bytes); + } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) { + // vpr to stack + __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo)); + } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) { + // stack to vpr + __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo)); + } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) { + // vpr to vpr + __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo])); + } else { + ShouldNotReachHere(); + } + } + } else if (cbuf != NULL) { + C2_MacroAssembler _masm(cbuf); + Assembler::CompressibleRegion cr(&_masm); + switch (src_lo_rc) { + case rc_int: + if (dst_lo_rc == rc_int) { // gpr --> gpr copy + if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass + __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32); + } else { + __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo])); + } + } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy + if (is64) { + __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); + } else { + __ fmv_w_x(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); + } + } else { // gpr --> stack spill + assert(dst_lo_rc == rc_stack, "spill to bad register class"); + __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset); + } + break; + case rc_float: + if (dst_lo_rc == rc_int) { // fpr --> gpr copy + if (is64) { + __ fmv_x_d(as_Register(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); + } else { + __ fmv_x_w(as_Register(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); + } + } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy + if (is64) { + __ fmv_d(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); + } else { + __ fmv_s(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); + } + } else { // fpr --> stack spill + assert(dst_lo_rc == rc_stack, "spill to bad register class"); + __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), + is64, dst_offset); + } + break; + case rc_stack: + if (dst_lo_rc == rc_int) { // stack --> gpr load + if (this->ideal_reg() == Op_RegI) { + __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); + } else { // // zero extended for narrow oop or klass + __ unspillu(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); + } + } else if (dst_lo_rc == rc_float) { // stack --> fpr load + __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), + is64, src_offset); + } else { // stack --> stack copy + assert(dst_lo_rc == rc_stack, "spill to bad register class"); + if (this->ideal_reg() == Op_RegI) { + __ unspill(t0, is64, src_offset); + } else { // zero extended for narrow oop or klass + __ unspillu(t0, is64, src_offset); + } + __ spill(t0, is64, dst_offset); + } + break; + default: + ShouldNotReachHere(); + } + } + + if (st != NULL) { + st->print("spill "); + if (src_lo_rc == rc_stack) { + st->print("[sp, #%d] -> ", src_offset); + } else { + st->print("%s -> ", Matcher::regName[src_lo]); + } + if (dst_lo_rc == rc_stack) { + st->print("[sp, #%d]", dst_offset); + } else { + st->print("%s", Matcher::regName[dst_lo]); + } + if (bottom_type()->isa_vect() != NULL) { + int vsize = 0; + if (ideal_reg() == Op_VecA) { + vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8; + } else { + ShouldNotReachHere(); + } + st->print("\t# vector spill size = %d", vsize); + } else { + st->print("\t# spill size = %d", is64 ? 64 : 32); + } + } + + return 0; +} + +#ifndef PRODUCT +void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + if (ra_ == NULL) { + st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx); + } else { + implementation(NULL, ra_, false, st); + } +} +#endif + +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + implementation(&cbuf, ra_, false, NULL); +} + +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +//============================================================================= + +#ifndef PRODUCT +void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + assert_cond(ra_ != NULL && st != NULL); + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_reg_first(this); + st->print("add %s, sp, #%d\t# box lock", + Matcher::regName[reg], offset); +} +#endif + +void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + C2_MacroAssembler _masm(&cbuf); + + assert_cond(ra_ != NULL); + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_encode(this); + + if (is_imm_in_range(offset, 12, 0)) { + __ addi(as_Register(reg), sp, offset); + } else if (is_imm_in_range(offset, 32, 0)) { + __ li32(t0, offset); + __ add(as_Register(reg), sp, t0); + } else { + ShouldNotReachHere(); + } +} + +uint BoxLockNode::size(PhaseRegAlloc *ra_) const { + // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_). + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + + if (is_imm_in_range(offset, 12, 0)) { + return NativeInstruction::instruction_size; + } else { + return 3 * NativeInstruction::instruction_size; // lui + addiw + add; + } +} + +//============================================================================= + +#ifndef PRODUCT +void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const +{ + assert_cond(st != NULL); + st->print_cr("# MachUEPNode"); + if (UseCompressedClassPointers) { + st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); + if (CompressedKlassPointers::shift() != 0) { + st->print_cr("\tdecode_klass_not_null t0, t0"); + } + } else { + st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); + } + st->print_cr("\tbeq t0, t1, ic_hit"); + st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check"); + st->print_cr("\tic_hit:"); +} +#endif + +void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const +{ + // This is the unverified entry point. + C2_MacroAssembler _masm(&cbuf); + + Label skip; + __ cmp_klass(j_rarg0, t1, t0, skip); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + __ bind(skip); +} + +uint MachUEPNode::size(PhaseRegAlloc* ra_) const +{ + assert_cond(ra_ != NULL); + return MachNode::size(ra_); +} + +// REQUIRED EMIT CODE + +//============================================================================= + +// Emit exception handler code. +int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) +{ + // la_patchable t0, #exception_blob_entry_point + // jr (offset)t0 + // or + // j #exception_blob_entry_point + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. + C2_MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_exception_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + int offset = __ offset(); + __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); + assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); + __ end_a_stub(); + return offset; +} + +// Emit deopt handler code. +int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) +{ + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. + C2_MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_deopt_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + int offset = __ offset(); + + __ auipc(ra, 0); + __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); + + assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); + __ end_a_stub(); + return offset; + +} +// REQUIRED MATCHER CODE + +//============================================================================= + +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) { + return false; + } + + switch (opcode) { + case Op_CacheWB: // fall through + case Op_CacheWBPreSync: // fall through + case Op_CacheWBPostSync: + if (!VM_Version::supports_data_cache_line_flush()) { + return false; + } + break; + + case Op_StrCompressedCopy: // fall through + case Op_StrInflatedCopy: // fall through + case Op_HasNegatives: + return UseRVV; + + case Op_EncodeISOArray: + return UseRVV && SpecialEncodeISOArray; + + case Op_PopCountI: + case Op_PopCountL: + return UsePopCountInstruction; + + case Op_RotateRight: + case Op_RotateLeft: + case Op_CountLeadingZerosI: + case Op_CountLeadingZerosL: + case Op_CountTrailingZerosI: + case Op_CountTrailingZerosL: + return UseRVB; + } + + return true; // Per default match rules are supported. +} + +// Identify extra cases that we might want to provide match rules for vector nodes and +// other intrinsics guarded with vector length (vlen) and element type (bt). +const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { + return false; + } + + return op_vec_supported(opcode); +} + +const RegMask* Matcher::predicate_reg_mask(void) { + return NULL; +} + +const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) { + return NULL; +} + +// Vector calling convention not yet implemented. +const bool Matcher::supports_vector_calling_convention(void) { + return false; +} + +OptoRegPair Matcher::vector_return_value(uint ideal_reg) { + Unimplemented(); + return OptoRegPair(0, 0); +} + +const int Matcher::float_pressure(int default_pressure_threshold) { + return default_pressure_threshold; +} + +// Is this branch offset short enough that a short branch can be used? +// +// NOTE: If the platform does not provide any short branch variants, then +// this method should return false for offset 0. +// |---label(L1)-----| +// |-----------------| +// |-----------------|----------eq: float------------------- +// |-----------------| // far_cmpD_branch | cmpD_branch +// |------- ---------| feq; | feq; +// |-far_cmpD_branch-| beqz done; | bnez L; +// |-----------------| j L; | +// |-----------------| bind(done); | +// |-----------------|-------------------------------------- +// |-----------------| // so shortBrSize = br_size - 4; +// |-----------------| // so offs = offset - shortBrSize + 4; +// |---label(L2)-----| +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + // The passed offset is relative to address of the branch. + int shortBrSize = br_size - 4; + int offs = offset - shortBrSize + 4; + return (-4096 <= offs && offs < 4096); +} + +// Vector width in bytes. +const int Matcher::vector_width_in_bytes(BasicType bt) { + if (UseRVV) { + // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV. + // MaxVectorSize == VM_Version::_initial_vector_length + return MaxVectorSize; + } + return 0; +} + +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + return vector_width_in_bytes(bt) / type2aelembytes(bt); +} +const int Matcher::min_vector_size(const BasicType bt) { + return max_vector_size(bt); +} + +// Vector ideal reg. +const uint Matcher::vector_ideal_reg(int len) { + assert(MaxVectorSize >= len, ""); + if (UseRVV) { + return Op_VecA; + } + + ShouldNotReachHere(); + return 0; +} + +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return Matcher::max_vector_size(bt); +} + +MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) { + ShouldNotReachHere(); // generic vector operands not supported + return NULL; +} + +bool Matcher::is_generic_reg2reg_move(MachNode* m) { + ShouldNotReachHere(); // generic vector operands not supported + return false; +} + +bool Matcher::is_generic_vector(MachOper* opnd) { + ShouldNotReachHere(); // generic vector operands not supported + return false; +} + +// Return whether or not this register is ever used as an argument. +// This function is used on startup to build the trampoline stubs in +// generateOptoStub. Registers not mentioned will be killed by the VM +// call in the trampoline, and arguments in those registers not be +// available to the callee. +bool Matcher::can_be_java_arg(int reg) +{ + return + reg == R10_num || reg == R10_H_num || + reg == R11_num || reg == R11_H_num || + reg == R12_num || reg == R12_H_num || + reg == R13_num || reg == R13_H_num || + reg == R14_num || reg == R14_H_num || + reg == R15_num || reg == R15_H_num || + reg == R16_num || reg == R16_H_num || + reg == R17_num || reg == R17_H_num || + reg == F10_num || reg == F10_H_num || + reg == F11_num || reg == F11_H_num || + reg == F12_num || reg == F12_H_num || + reg == F13_num || reg == F13_H_num || + reg == F14_num || reg == F14_H_num || + reg == F15_num || reg == F15_H_num || + reg == F16_num || reg == F16_H_num || + reg == F17_num || reg == F17_H_num; +} + +bool Matcher::is_spillable_arg(int reg) +{ + return can_be_java_arg(reg); +} + +bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { + return false; +} + +RegMask Matcher::divI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODI projection of divmodI. +RegMask Matcher::modI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for DIVL projection of divmodL. +RegMask Matcher::divL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODL projection of divmodL. +RegMask Matcher::modL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +const RegMask Matcher::method_handle_invoke_SP_save_mask() { + return FP_REG_mask(); +} + +bool size_fits_all_mem_uses(AddPNode* addp, int shift) { + assert_cond(addp != NULL); + for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) { + Node* u = addp->fast_out(i); + if (u != NULL && u->is_Mem()) { + int opsize = u->as_Mem()->memory_size(); + assert(opsize > 0, "unexpected memory operand size"); + if (u->as_Mem()->memory_size() != (1 << shift)) { + return false; + } + } + } + return true; +} + +// Should the Matcher clone input 'm' of node 'n'? +bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { + assert_cond(m != NULL); + if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) + mstack.push(m, Visit); // m = ShiftCntV + return true; + } + return false; +} + +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? +bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + return clone_base_plus_offset_address(m, mstack, address_visited); +} + +%} + + + +//----------ENCODING BLOCK----------------------------------------------------- +// This block specifies the encoding classes used by the compiler to +// output byte streams. Encoding classes are parameterized macros +// used by Machine Instruction Nodes in order to generate the bit +// encoding of the instruction. Operands specify their base encoding +// interface with the interface keyword. There are currently +// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & +// COND_INTER. REG_INTER causes an operand to generate a function +// which returns its register number when queried. CONST_INTER causes +// an operand to generate a function which returns the value of the +// constant when queried. MEMORY_INTER causes an operand to generate +// four functions which return the Base Register, the Index Register, +// the Scale Value, and the Offset Value of the operand when queried. +// COND_INTER causes an operand to generate six functions which return +// the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional +// instruction. +// +// Instructions specify two basic values for encoding. Again, a +// function is available to check if the constant displacement is an +// oop. They use the ins_encode keyword to specify their encoding +// classes (which must be a sequence of enc_class names, and their +// parameters, specified in the encoding block), and they use the +// opcode keyword to specify, in order, their primary, secondary, and +// tertiary opcode. Only the opcode sections which a particular +// instruction needs for encoding need to be specified. +encode %{ + // BEGIN Non-volatile memory access + + enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{ + C2_MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + int64_t con = (int64_t)$src$$constant; + Register dst_reg = as_Register($dst$$reg); + __ li(dst_reg, con); + %} + + enc_class riscv_enc_mov_p(iRegP dst, immP src) %{ + C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + address con = (address)$src$$constant; + if (con == NULL || con == (address)1) { + ShouldNotReachHere(); + } else { + relocInfo::relocType rtype = $src->constant_reloc(); + if (rtype == relocInfo::oop_type) { + __ movoop(dst_reg, (jobject)con, /*immediate*/true); + } else if (rtype == relocInfo::metadata_type) { + __ mov_metadata(dst_reg, (Metadata*)con); + } else { + assert(rtype == relocInfo::none, "unexpected reloc type"); + __ li(dst_reg, $src$$constant); + } + } + %} + + enc_class riscv_enc_mov_p1(iRegP dst) %{ + C2_MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + Register dst_reg = as_Register($dst$$reg); + __ li(dst_reg, 1); + %} + + enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{ + C2_MacroAssembler _masm(&cbuf); + __ load_byte_map_base($dst$$Register); + %} + + enc_class riscv_enc_mov_n(iRegN dst, immN src) %{ + C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + address con = (address)$src$$constant; + if (con == NULL) { + ShouldNotReachHere(); + } else { + relocInfo::relocType rtype = $src->constant_reloc(); + assert(rtype == relocInfo::oop_type, "unexpected reloc type"); + __ set_narrow_oop(dst_reg, (jobject)con); + } + %} + + enc_class riscv_enc_mov_zero(iRegNorP dst) %{ + C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + __ mv(dst_reg, zr); + %} + + enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{ + C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + address con = (address)$src$$constant; + if (con == NULL) { + ShouldNotReachHere(); + } else { + relocInfo::relocType rtype = $src->constant_reloc(); + assert(rtype == relocInfo::metadata_type, "unexpected reloc type"); + __ set_narrow_klass(dst_reg, (Klass *)con); + } + %} + + enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ + C2_MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ + C2_MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ + C2_MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ + C2_MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ + C2_MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ + C2_MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + // compare and branch instruction encodings + + enc_class riscv_enc_j(label lbl) %{ + C2_MacroAssembler _masm(&cbuf); + Label* L = $lbl$$label; + __ j(*L); + %} + + enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{ + C2_MacroAssembler _masm(&cbuf); + Label* L = $lbl$$label; + switch ($cmp$$cmpcode) { + case(BoolTest::ge): + __ j(*L); + break; + case(BoolTest::lt): + break; + default: + Unimplemented(); + } + %} + + // call instruction encodings + + enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{ + Register sub_reg = as_Register($sub$$reg); + Register super_reg = as_Register($super$$reg); + Register temp_reg = as_Register($temp$$reg); + Register result_reg = as_Register($result$$reg); + Register cr_reg = t1; + + Label miss; + Label done; + C2_MacroAssembler _masm(&cbuf); + __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg, + NULL, &miss); + if ($primary) { + __ mv(result_reg, zr); + } else { + __ mv(cr_reg, zr); + __ j(done); + } + + __ bind(miss); + if (!$primary) { + __ li(cr_reg, 1); + } + + __ bind(done); + %} + + enc_class riscv_enc_java_static_call(method meth) %{ + C2_MacroAssembler _masm(&cbuf); + + address addr = (address)$meth$$method; + address call = NULL; + assert_cond(addr != NULL); + if (!_method) { + // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. + call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf); + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } else { + int method_index = resolved_method_index(cbuf); + RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) + : static_call_Relocation::spec(method_index); + call = __ trampoline_call(Address(addr, rspec), &cbuf); + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + + // Emit stub for static call + address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); + if (stub == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } + %} + + enc_class riscv_enc_java_dynamic_call(method meth) %{ + C2_MacroAssembler _masm(&cbuf); + int method_index = resolved_method_index(cbuf); + address call = __ ic_call((address)$meth$$method, method_index); + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + %} + + enc_class riscv_enc_call_epilog() %{ + C2_MacroAssembler _masm(&cbuf); + if (VerifyStackAtCalls) { + // Check that stack depth is unchanged: find majik cookie on stack + __ call_Unimplemented(); + } + %} + + enc_class riscv_enc_java_to_runtime(method meth) %{ + C2_MacroAssembler _masm(&cbuf); + + // some calls to generated routines (arraycopy code) are scheduled + // by C2 as runtime calls. if so we can call them using a jr (they + // will be in a reachable segment) otherwise we have to use a jalr + // which loads the absolute address into a register. + address entry = (address)$meth$$method; + CodeBlob *cb = CodeCache::find_blob(entry); + if (cb != NULL) { + address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type)); + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } else { + Label retaddr; + __ la(t1, retaddr); + __ la(t0, RuntimeAddress(entry)); + // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc() + __ addi(sp, sp, -2 * wordSize); + __ sd(t1, Address(sp, wordSize)); + __ jalr(t0); + __ bind(retaddr); + __ addi(sp, sp, 2 * wordSize); + } + %} + + // using the cr register as the bool result: 0 for success; others failed. + enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ + C2_MacroAssembler _masm(&cbuf); + Register flag = t1; + Register oop = as_Register($object$$reg); + Register box = as_Register($box$$reg); + Register disp_hdr = as_Register($tmp1$$reg); + Register tmp = as_Register($tmp2$$reg); + Label cont; + Label object_has_monitor; + + assert_different_registers(oop, box, tmp, disp_hdr, t0); + + // Load markWord from object into displaced_header. + __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); + + if (DiagnoseSyncOnValueBasedClasses != 0) { + __ load_klass(flag, oop); + __ lwu(flag, Address(flag, Klass::access_flags_offset())); + __ andi(flag, flag, JVM_ACC_IS_VALUE_BASED_CLASS, tmp /* tmp */); + __ bnez(flag, cont, true /* is_far */); + } + + if (UseBiasedLocking && !UseOptoBiasInlining) { + __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont); + } + + // Check for existing monitor + __ andi(t0, disp_hdr, markWord::monitor_value); + __ bnez(t0, object_has_monitor); + + // Set tmp to be (markWord of object | UNLOCK_VALUE). + __ ori(tmp, disp_hdr, markWord::unlocked_value); + + // Initialize the box. (Must happen before we update the object mark!) + __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + + // Compare object markWord with an unlocked value (tmp) and if + // equal exchange the stack address of our box with object markWord. + // On failure disp_hdr contains the possibly locked markWord. + __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, + Assembler::rl, /*result*/disp_hdr); + __ mv(flag, zr); + __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + // If the compare-and-exchange succeeded, then we found an unlocked + // object, will have now locked it will continue at label cont + // We did not see an unlocked object so try the fast recursive case. + + // Check if the owner is self by comparing the value in the + // markWord of object (disp_hdr) with the stack pointer. + __ sub(disp_hdr, disp_hdr, sp); + __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place)); + // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, + // hence we can store 0 as the displaced header in the box, which indicates that it is a + // recursive lock. + __ andr(tmp/*==0?*/, disp_hdr, tmp); + __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); + __ mv(flag, tmp); // we can use the value of tmp as the result here + + __ j(cont); + + // Handle existing monitor. + __ bind(object_has_monitor); + // The object's monitor m is unlocked iff m->owner == NULL, + // otherwise m->owner may contain a thread or a stack address. + // + // Try to CAS m->owner from NULL to current thread. + __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value)); + __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, + Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) + + // Store a non-null value into the box to avoid looking like a re-entrant + // lock. The fast-path monitor unlock code checks for + // markWord::monitor_value so use markWord::unused_mark which has the + // relevant bit set, and also matches ObjectSynchronizer::slow_enter. + __ mv(tmp, (address)markWord::unused_mark().value()); + __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + + __ beqz(flag, cont); // CAS success means locking succeeded + + __ bne(flag, xthread, cont); // Check for recursive locking + + // Recursive lock case + __ mv(flag, zr); + __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value)); + __ add(tmp, tmp, 1u); + __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value)); + + __ bind(cont); + %} + + // using cr flag to indicate the fast_unlock result: 0 for success; others failed. + enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ + C2_MacroAssembler _masm(&cbuf); + Register flag = t1; + Register oop = as_Register($object$$reg); + Register box = as_Register($box$$reg); + Register disp_hdr = as_Register($tmp1$$reg); + Register tmp = as_Register($tmp2$$reg); + Label cont; + Label object_has_monitor; + + assert_different_registers(oop, box, tmp, disp_hdr, flag); + + if (UseBiasedLocking && !UseOptoBiasInlining) { + __ biased_locking_exit(oop, tmp, cont); + } + + // Find the lock address and load the displaced header from the stack. + __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); + + // If the displaced header is 0, we have a recursive unlock. + __ mv(flag, disp_hdr); + __ beqz(disp_hdr, cont); + + // Handle existing monitor. + __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); + __ andi(t0, disp_hdr, markWord::monitor_value); + __ bnez(t0, object_has_monitor); + + // Check if it is still a light weight lock, this is true if we + // see the stack address of the basicLock in the markWord of the + // object. + + __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, + Assembler::rl, /*result*/tmp); + __ xorr(flag, box, tmp); // box == tmp if cas succeeds + __ j(cont); + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + // Handle existing monitor. + __ bind(object_has_monitor); + STATIC_ASSERT(markWord::monitor_value <= INT_MAX); + __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor + __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); + __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); + + Label notRecursive; + __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. + __ bnez(flag, cont); + __ beqz(disp_hdr, notRecursive); // Will be 0 if not recursive. + + // Recursive lock + __ addi(disp_hdr, disp_hdr, -1); + __ sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); + // flag == 0 was set in the ownership check above + __ j(cont); + + __ bind(notRecursive); + __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); + __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); + __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. + __ bnez(flag, cont); + // need a release store here + __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); + __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + __ sd(zr, Address(tmp)); // set unowned + + __ bind(cont); + %} + + // arithmetic encodings + + enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ + C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + __ corrected_idivl(dst_reg, src1_reg, src2_reg, false); + %} + + enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ + C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + __ corrected_idivq(dst_reg, src1_reg, src2_reg, false); + %} + + enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ + C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + __ corrected_idivl(dst_reg, src1_reg, src2_reg, true); + %} + + enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ + C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + __ corrected_idivq(dst_reg, src1_reg, src2_reg, true); + %} + + enc_class riscv_enc_tail_call(iRegP jump_target) %{ + C2_MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + Register target_reg = as_Register($jump_target$$reg); + __ jr(target_reg); + %} + + enc_class riscv_enc_tail_jmp(iRegP jump_target) %{ + C2_MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + Register target_reg = as_Register($jump_target$$reg); + // exception oop should be in x10 + // ret addr has been popped into ra + // callee expects it in x13 + __ mv(x13, ra); + __ jr(target_reg); + %} + + enc_class riscv_enc_rethrow() %{ + C2_MacroAssembler _masm(&cbuf); + __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); + %} + + enc_class riscv_enc_ret() %{ + C2_MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + __ ret(); + %} + +%} + +//----------FRAME-------------------------------------------------------------- +// Definition of frame structure and management information. +// +// S T A C K L A Y O U T Allocators stack-slot number +// | (to get allocators register number +// G Owned by | | v add OptoReg::stack0()) +// r CALLER | | +// o | +--------+ pad to even-align allocators stack-slot +// w V | pad0 | numbers; owned by CALLER +// t -----------+--------+----> Matcher::_in_arg_limit, unaligned +// h ^ | in | 5 +// | | args | 4 Holes in incoming args owned by SELF +// | | | | 3 +// | | +--------+ +// V | | old out| Empty on Intel, window on Sparc +// | old |preserve| Must be even aligned. +// | SP-+--------+----> Matcher::_old_SP, even aligned +// | | in | 3 area for Intel ret address +// Owned by |preserve| Empty on Sparc. +// SELF +--------+ +// | | pad2 | 2 pad to align old SP +// | +--------+ 1 +// | | locks | 0 +// | +--------+----> OptoReg::stack0(), even aligned +// | | pad1 | 11 pad to align new SP +// | +--------+ +// | | | 10 +// | | spills | 9 spills +// V | | 8 (pad0 slot for callee) +// -----------+--------+----> Matcher::_out_arg_limit, unaligned +// ^ | out | 7 +// | | args | 6 Holes in outgoing args owned by CALLEE +// Owned by +--------+ +// CALLEE | new out| 6 Empty on Intel, window on Sparc +// | new |preserve| Must be even-aligned. +// | SP-+--------+----> Matcher::_new_SP, even aligned +// | | | +// +// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is +// known from SELF's arguments and the Java calling convention. +// Region 6-7 is determined per call site. +// Note 2: If the calling convention leaves holes in the incoming argument +// area, those holes are owned by SELF. Holes in the outgoing area +// are owned by the CALLEE. Holes should not be nessecary in the +// incoming area, as the Java calling convention is completely under +// the control of the AD file. Doubles can be sorted and packed to +// avoid holes. Holes in the outgoing arguments may be nessecary for +// varargs C calling conventions. +// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is +// even aligned with pad0 as needed. +// Region 6 is even aligned. Region 6-7 is NOT even aligned; +// (the latter is true on Intel but is it false on RISCV?) +// region 6-11 is even aligned; it may be padded out more so that +// the region from SP to FP meets the minimum stack alignment. +// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack +// alignment. Region 11, pad1, may be dynamically extended so that +// SP meets the minimum alignment. + +frame %{ + // These three registers define part of the calling convention + // between compiled code and the interpreter. + + // Inline Cache Register or methodOop for I2C. + inline_cache_reg(R31); + + // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] + cisc_spilling_operand_name(indOffset); + + // Number of stack slots consumed by locking an object + // generate Compile::sync_stack_slots + // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2 + sync_stack_slots(1 * VMRegImpl::slots_per_word); + + // Compiled code's Frame Pointer + frame_pointer(R2); + + // Interpreter stores its frame pointer in a register which is + // stored to the stack by I2CAdaptors. + // I2CAdaptors convert from interpreted java to compiled java. + interpreter_frame_pointer(R8); + + // Stack alignment requirement + stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) + + // Number of outgoing stack slots killed above the out_preserve_stack_slots + // for calls to C. Supports the var-args backing area for register parms. + varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt); + + // The after-PROLOG location of the return address. Location of + // return address specifies a type (REG or STACK) and a number + // representing the register number (i.e. - use a register name) or + // stack slot. + // Ret Addr is on stack in slot 0 if no locks or verification or alignment. + // Otherwise, it is above the locks and verification slot and alignment word + // TODO this may well be correct but need to check why that - 2 is there + // ppc port uses 0 but we definitely need to allow for fixed_slots + // which folds in the space used for monitors + return_addr(STACK - 2 + + align_up((Compile::current()->in_preserve_stack_slots() + + Compile::current()->fixed_slots()), + stack_alignment_in_slots())); + + // Location of compiled Java return values. Same as C for now. + return_value + %{ + assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, + "only return normal values"); + + static const int lo[Op_RegL + 1] = { // enum name + 0, // Op_Node + 0, // Op_Set + R10_num, // Op_RegN + R10_num, // Op_RegI + R10_num, // Op_RegP + F10_num, // Op_RegF + F10_num, // Op_RegD + R10_num // Op_RegL + }; + + static const int hi[Op_RegL + 1] = { // enum name + 0, // Op_Node + 0, // Op_Set + OptoReg::Bad, // Op_RegN + OptoReg::Bad, // Op_RegI + R10_H_num, // Op_RegP + OptoReg::Bad, // Op_RegF + F10_H_num, // Op_RegD + R10_H_num // Op_RegL + }; + + return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); + %} +%} + +//----------ATTRIBUTES--------------------------------------------------------- +//----------Operand Attributes------------------------------------------------- +op_attrib op_cost(1); // Required cost attribute + +//----------Instruction Attributes--------------------------------------------- +ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute +ins_attrib ins_size(32); // Required size attribute (in bits) +ins_attrib ins_short_branch(0); // Required flag: is this instruction + // a non-matching short branch variant + // of some long branch? +ins_attrib ins_alignment(4); // Required alignment attribute (must + // be a power of 2) specifies the + // alignment that some part of the + // instruction (not necessarily the + // start) requires. If > 1, a + // compute_padding() function must be + // provided for the instruction + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct parsing +// in the ADLC because operands constitute user defined types which are used in +// instruction definitions. + +//----------Simple Operands---------------------------------------------------- + +// Integer operands 32 bit +// 32 bit immediate +operand immI() +%{ + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 32 bit zero +operand immI0() +%{ + predicate(n->get_int() == 0); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 32 bit unit increment +operand immI_1() +%{ + predicate(n->get_int() == 1); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 32 bit unit decrement +operand immI_M1() +%{ + predicate(n->get_int() == -1); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Integer Immediate: 6-bit int, greater than 32 +operand uimmI6_ge32() %{ + predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32)); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_le_4() +%{ + predicate(n->get_int() <= 4); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_16() +%{ + predicate(n->get_int() == 16); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_24() +%{ + predicate(n->get_int() == 24); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_31() +%{ + predicate(n->get_int() == 31); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_63() +%{ + predicate(n->get_int() == 63); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 32 bit integer valid for add immediate +operand immIAdd() +%{ + predicate(Assembler::operand_valid_for_add_immediate((int64_t)n->get_int())); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 32 bit integer valid for sub immediate +operand immISub() +%{ + predicate(Assembler::operand_valid_for_add_immediate(-(int64_t)n->get_int())); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 5 bit signed value. +operand immI5() +%{ + predicate(n->get_int() <= 15 && n->get_int() >= -16); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 5 bit signed value (simm5) +operand immL5() +%{ + predicate(n->get_long() <= 15 && n->get_long() >= -16); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer operands 64 bit +// 64 bit immediate +operand immL() +%{ + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 64 bit zero +operand immL0() +%{ + predicate(n->get_long() == 0); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer operands +// Pointer Immediate +operand immP() +%{ + match(ConP); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immP0() +%{ + predicate(n->get_ptr() == 0); + match(ConP); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate One +// this is used in object initialization (initial object header) +operand immP_1() +%{ + predicate(n->get_ptr() == 1); + match(ConP); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Card Table Byte Map Base +operand immByteMapBase() +%{ + // Get base of card map + predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) && + (CardTable::CardValue*)n->get_ptr() == + ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); + match(ConP); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Int Immediate: low 16-bit mask +operand immI_16bits() +%{ + predicate(n->get_int() == 0xFFFF); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: low 32-bit mask +operand immL_32bits() +%{ + predicate(n->get_long() == 0xFFFFFFFFL); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 64 bit unit decrement +operand immL_M1() +%{ + predicate(n->get_long() == -1); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + + +// 32 bit offset of pc in thread anchor + +operand immL_pc_off() +%{ + predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) + + in_bytes(JavaFrameAnchor::last_Java_pc_offset())); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 64 bit integer valid for add immediate +operand immLAdd() +%{ + predicate(Assembler::operand_valid_for_add_immediate(n->get_long())); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 64 bit integer valid for sub immediate +operand immLSub() +%{ + predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long()))); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Narrow pointer operands +// Narrow Pointer Immediate +operand immN() +%{ + match(ConN); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Narrow NULL Pointer Immediate +operand immN0() +%{ + predicate(n->get_narrowcon() == 0); + match(ConN); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immNKlass() +%{ + match(ConNKlass); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Float and Double operands +// Double Immediate +operand immD() +%{ + match(ConD); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Double Immediate: +0.0d +operand immD0() +%{ + predicate(jlong_cast(n->getd()) == 0); + match(ConD); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate +operand immF() +%{ + match(ConF); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate: +0.0f. +operand immF0() +%{ + predicate(jint_cast(n->getf()) == 0); + match(ConF); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immIOffset() +%{ + predicate(is_imm_in_range(n->get_int(), 12, 0)); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immLOffset() +%{ + predicate(is_imm_in_range(n->get_long(), 12, 0)); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Scale values +operand immIScale() +%{ + predicate(1 <= n->get_int() && (n->get_int() <= 3)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer 32 bit Register Operands +operand iRegI() +%{ + constraint(ALLOC_IN_RC(any_reg32)); + match(RegI); + match(iRegINoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Integer 32 bit Register not Special +operand iRegINoSp() +%{ + constraint(ALLOC_IN_RC(no_special_reg32)); + match(RegI); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Register R10 only +operand iRegI_R10() +%{ + constraint(ALLOC_IN_RC(int_r10_reg)); + match(RegI); + match(iRegINoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Register R12 only +operand iRegI_R12() +%{ + constraint(ALLOC_IN_RC(int_r12_reg)); + match(RegI); + match(iRegINoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Register R13 only +operand iRegI_R13() +%{ + constraint(ALLOC_IN_RC(int_r13_reg)); + match(RegI); + match(iRegINoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Register R14 only +operand iRegI_R14() +%{ + constraint(ALLOC_IN_RC(int_r14_reg)); + match(RegI); + match(iRegINoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Integer 64 bit Register Operands +operand iRegL() +%{ + constraint(ALLOC_IN_RC(any_reg)); + match(RegL); + match(iRegLNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Integer 64 bit Register not Special +operand iRegLNoSp() +%{ + constraint(ALLOC_IN_RC(no_special_reg)); + match(RegL); + match(iRegL_R10); + format %{ %} + interface(REG_INTER); +%} + +// Long 64 bit Register R28 only +operand iRegL_R28() +%{ + constraint(ALLOC_IN_RC(r28_reg)); + match(RegL); + match(iRegLNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Long 64 bit Register R29 only +operand iRegL_R29() +%{ + constraint(ALLOC_IN_RC(r29_reg)); + match(RegL); + match(iRegLNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Long 64 bit Register R30 only +operand iRegL_R30() +%{ + constraint(ALLOC_IN_RC(r30_reg)); + match(RegL); + match(iRegLNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Pointer Register Operands +// Pointer Register +operand iRegP() +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(RegP); + match(iRegPNoSp); + match(iRegP_R10); + match(javaThread_RegP); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Pointer 64 bit Register not Special +operand iRegPNoSp() +%{ + constraint(ALLOC_IN_RC(no_special_ptr_reg)); + match(RegP); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand iRegP_R10() +%{ + constraint(ALLOC_IN_RC(r10_reg)); + match(RegP); + // match(iRegP); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Pointer 64 bit Register R11 only +operand iRegP_R11() +%{ + constraint(ALLOC_IN_RC(r11_reg)); + match(RegP); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand iRegP_R12() +%{ + constraint(ALLOC_IN_RC(r12_reg)); + match(RegP); + // match(iRegP); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Pointer 64 bit Register R13 only +operand iRegP_R13() +%{ + constraint(ALLOC_IN_RC(r13_reg)); + match(RegP); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand iRegP_R14() +%{ + constraint(ALLOC_IN_RC(r14_reg)); + match(RegP); + // match(iRegP); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand iRegP_R15() +%{ + constraint(ALLOC_IN_RC(r15_reg)); + match(RegP); + // match(iRegP); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand iRegP_R16() +%{ + constraint(ALLOC_IN_RC(r16_reg)); + match(RegP); + // match(iRegP); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Pointer 64 bit Register R28 only +operand iRegP_R28() +%{ + constraint(ALLOC_IN_RC(r28_reg)); + match(RegP); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Pointer Register Operands +// Narrow Pointer Register +operand iRegN() +%{ + constraint(ALLOC_IN_RC(any_reg32)); + match(RegN); + match(iRegNNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Integer 64 bit Register not Special +operand iRegNNoSp() +%{ + constraint(ALLOC_IN_RC(no_special_reg32)); + match(RegN); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// heap base register -- used for encoding immN0 +operand iRegIHeapbase() +%{ + constraint(ALLOC_IN_RC(heapbase_reg)); + match(RegI); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Long 64 bit Register R10 only +operand iRegL_R10() +%{ + constraint(ALLOC_IN_RC(r10_reg)); + match(RegL); + match(iRegLNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Float Register +// Float register operands +operand fRegF() +%{ + constraint(ALLOC_IN_RC(float_reg)); + match(RegF); + + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Double Register +// Double register operands +operand fRegD() +%{ + constraint(ALLOC_IN_RC(double_reg)); + match(RegD); + + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Generic vector class. This will be used for +// all vector operands. +operand vReg() +%{ + constraint(ALLOC_IN_RC(vectora_reg)); + match(VecA); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vReg_V1() +%{ + constraint(ALLOC_IN_RC(v1_reg)); + match(VecA); + match(vReg); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vReg_V2() +%{ + constraint(ALLOC_IN_RC(v2_reg)); + match(VecA); + match(vReg); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vReg_V3() +%{ + constraint(ALLOC_IN_RC(v3_reg)); + match(VecA); + match(vReg); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vReg_V4() +%{ + constraint(ALLOC_IN_RC(v4_reg)); + match(VecA); + match(vReg); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vReg_V5() +%{ + constraint(ALLOC_IN_RC(v5_reg)); + match(VecA); + match(vReg); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Java Thread Register +operand javaThread_RegP(iRegP reg) +%{ + constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg + match(reg); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +//----------Memory Operands---------------------------------------------------- +// RISCV has only base_plus_offset and literal address mode, so no need to use +// index and scale. Here set index as 0xffffffff and scale as 0x0. +operand indirect(iRegP reg) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(reg); + op_cost(0); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp(0x0); + %} +%} + +operand indOffI(iRegP reg, immIOffset off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffL(iRegP reg, immLOffset off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indirectN(iRegN reg) +%{ + predicate(CompressedOops::shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(DecodeN reg); + op_cost(0); + format %{ "[$reg]\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp(0x0); + %} +%} + +operand indOffIN(iRegN reg, immIOffset off) +%{ + predicate(CompressedOops::shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) off); + op_cost(0); + format %{ "[$reg, $off]\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffLN(iRegN reg, immLOffset off) +%{ + predicate(CompressedOops::shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) off); + op_cost(0); + format %{ "[$reg, $off]\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +// RISCV opto stubs need to write to the pc slot in the thread anchor +operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + + +//----------Special Memory Operands-------------------------------------------- +// Stack Slot Operand - This operand is used for loading and storing temporary +// values on the stack where a match requires a value to +// flow through memory. +operand stackSlotI(sRegI reg) +%{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + // match(RegI); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x02); // RSP + index(0xffffffff); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotF(sRegF reg) +%{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + // match(RegF); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x02); // RSP + index(0xffffffff); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotD(sRegD reg) +%{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + // match(RegD); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x02); // RSP + index(0xffffffff); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotL(sRegL reg) +%{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + // match(RegL); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x02); // RSP + index(0xffffffff); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +// Special operand allowing long args to int ops to be truncated for free + +operand iRegL2I(iRegL reg) %{ + + op_cost(0); + + match(ConvL2I reg); + + format %{ "l2i($reg)" %} + + interface(REG_INTER) +%} + + +// Comparison Operands +// NOTE: Label is a predefined operand which should not be redefined in +// the AD file. It is generically handled within the ADLC. + +//----------Conditional Branch Operands---------------------------------------- +// Comparison Op - This is the operation of the comparison, and is limited to +// the following set of codes: +// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) +// +// Other attributes of the comparison, such as unsignedness, are specified +// by the comparison instruction that sets a condition code flags register. +// That result is represented by a flags operand whose subtype is appropriate +// to the unsignedness (etc.) of the comparison. +// +// Later, the instruction which matches both the Comparison Op (a Bool) and +// the flags (produced by the Cmp) specifies the coding of the comparison op +// by matching a specific subtype of Bool operand below, such as cmpOpU. + + +// used for signed integral comparisons and fp comparisons +operand cmpOp() +%{ + match(Bool); + + format %{ "" %} + + // the values in interface derives from struct BoolTest::mask + interface(COND_INTER) %{ + equal(0x0, "eq"); + greater(0x1, "gt"); + overflow(0x2, "overflow"); + less(0x3, "lt"); + not_equal(0x4, "ne"); + less_equal(0x5, "le"); + no_overflow(0x6, "no_overflow"); + greater_equal(0x7, "ge"); + %} +%} + +// used for unsigned integral comparisons +operand cmpOpU() +%{ + match(Bool); + + format %{ "" %} + // the values in interface derives from struct BoolTest::mask + interface(COND_INTER) %{ + equal(0x0, "eq"); + greater(0x1, "gtu"); + overflow(0x2, "overflow"); + less(0x3, "ltu"); + not_equal(0x4, "ne"); + less_equal(0x5, "leu"); + no_overflow(0x6, "no_overflow"); + greater_equal(0x7, "geu"); + %} +%} + +// used for certain integral comparisons which can be +// converted to bxx instructions +operand cmpOpEqNe() +%{ + match(Bool); + op_cost(0); + predicate(n->as_Bool()->_test._test == BoolTest::ne || + n->as_Bool()->_test._test == BoolTest::eq); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x0, "eq"); + greater(0x1, "gt"); + overflow(0x2, "overflow"); + less(0x3, "lt"); + not_equal(0x4, "ne"); + less_equal(0x5, "le"); + no_overflow(0x6, "no_overflow"); + greater_equal(0x7, "ge"); + %} +%} + +operand cmpOpULtGe() +%{ + match(Bool); + op_cost(0); + predicate(n->as_Bool()->_test._test == BoolTest::lt || + n->as_Bool()->_test._test == BoolTest::ge); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x0, "eq"); + greater(0x1, "gt"); + overflow(0x2, "overflow"); + less(0x3, "lt"); + not_equal(0x4, "ne"); + less_equal(0x5, "le"); + no_overflow(0x6, "no_overflow"); + greater_equal(0x7, "ge"); + %} +%} + +operand cmpOpUEqNeLeGt() +%{ + match(Bool); + op_cost(0); + predicate(n->as_Bool()->_test._test == BoolTest::ne || + n->as_Bool()->_test._test == BoolTest::eq || + n->as_Bool()->_test._test == BoolTest::le || + n->as_Bool()->_test._test == BoolTest::gt); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x0, "eq"); + greater(0x1, "gt"); + overflow(0x2, "overflow"); + less(0x3, "lt"); + not_equal(0x4, "ne"); + less_equal(0x5, "le"); + no_overflow(0x6, "no_overflow"); + greater_equal(0x7, "ge"); + %} +%} + + +// Flags register, used as output of compare logic +operand rFlagsReg() +%{ + constraint(ALLOC_IN_RC(reg_flags)); + match(RegFlags); + + op_cost(0); + format %{ "RFLAGS" %} + interface(REG_INTER); +%} + +// Special Registers + +// Method Register +operand inline_cache_RegP(iRegP reg) +%{ + constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg + match(reg); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +//----------OPERAND CLASSES---------------------------------------------------- +// Operand Classes are groups of operands that are used as to simplify +// instruction definitions by not requiring the AD writer to specify +// separate instructions for every form of operand when the +// instruction accepts multiple operand types with the same basic +// encoding and format. The classic case of this is memory operands. + +// memory is used to define read/write location for load/store +// instruction defs. we can turn a memory op into an Address + +opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN); + +// iRegIorL2I is used for src inputs in rules for 32 bit int (I) +// operations. it allows the src to be either an iRegI or a (ConvL2I +// iRegL). in the latter case the l2i normally planted for a ConvL2I +// can be elided because the 32-bit instruction will just employ the +// lower 32 bits anyway. +// +// n.b. this does not elide all L2I conversions. if the truncated +// value is consumed by more than one operation then the ConvL2I +// cannot be bundled into the consuming nodes so an l2i gets planted +// (actually a mvw $dst $src) and the downstream instructions consume +// the result of the l2i as an iRegI input. That's a shame since the +// mvw is actually redundant but its not too costly. + +opclass iRegIorL2I(iRegI, iRegL2I); +opclass iRegIorL(iRegI, iRegL); +opclass iRegNorP(iRegN, iRegP); +opclass iRegILNP(iRegI, iRegL, iRegN, iRegP); +opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp); +opclass immIorL(immI, immL); + +//----------PIPELINE----------------------------------------------------------- +// Rules which define the behavior of the target architectures pipeline. + +// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline +//pipe_desc(ID, EX, MEM, WR); +#define ID S0 +#define EX S1 +#define MEM S2 +#define WR S3 + +// Integer ALU reg operation +pipeline %{ + +attributes %{ + // RISC-V instructions are of fixed length + fixed_size_instructions; // Fixed size instructions TODO does + max_instructions_per_bundle = 2; // Generic RISC-V 1, Sifive Series 7 2 + // RISC-V instructions come in 32-bit word units + instruction_unit_size = 4; // An instruction is 4 bytes long + instruction_fetch_unit_size = 64; // The processor fetches one line + instruction_fetch_units = 1; // of 64 bytes + + // List of nop instructions + nops( MachNop ); +%} + +// We don't use an actual pipeline model so don't care about resources +// or description. we do use pipeline classes to introduce fixed +// latencies + +//----------RESOURCES---------------------------------------------------------- +// Resources are the functional units available to the machine + +// Generic RISC-V pipeline +// 1 decoder +// 1 instruction decoded per cycle +// 1 load/store ops per cycle, 1 branch, 1 FPU +// 1 mul, 1 div + +resources ( DECODE, + ALU, + MUL, + DIV, + BRANCH, + LDST, + FPU); + +//----------PIPELINE DESCRIPTION----------------------------------------------- +// Pipeline Description specifies the stages in the machine's pipeline + +// Define the pipeline as a generic 6 stage pipeline +pipe_desc(S0, S1, S2, S3, S4, S5); + +//----------PIPELINE CLASSES--------------------------------------------------- +// Pipeline Classes describe the stages in which input and output are +// referenced by the hardware pipeline. + +pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2) +%{ + single_instruction; + src1 : S1(read); + src2 : S2(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2) +%{ + src1 : S1(read); + src2 : S2(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_uop_s(fRegF dst, fRegF src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_uop_d(fRegD dst, fRegD src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_d2f(fRegF dst, fRegD src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_f2d(fRegD dst, fRegF src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_f2i(iRegINoSp dst, fRegF src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_f2l(iRegLNoSp dst, fRegF src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_i2f(fRegF dst, iRegIorL2I src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_l2f(fRegF dst, iRegL src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_d2i(iRegINoSp dst, fRegD src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_d2l(iRegLNoSp dst, fRegD src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_i2d(fRegD dst, iRegIorL2I src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_l2d(fRegD dst, iRegIorL2I src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2) +%{ + single_instruction; + src1 : S1(read); + src2 : S2(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2) +%{ + single_instruction; + src1 : S1(read); + src2 : S2(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2) +%{ + single_instruction; + src1 : S1(read); + src2 : S2(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2) +%{ + single_instruction; + src1 : S1(read); + src2 : S2(read); + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_load_constant_s(fRegF dst) +%{ + single_instruction; + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_load_constant_d(fRegD dst) +%{ + single_instruction; + dst : S5(write); + DECODE : ID; + FPU : S5; +%} + +pipe_class fp_load_mem_s(fRegF dst, memory mem) +%{ + single_instruction; + mem : S1(read); + dst : S5(write); + DECODE : ID; + LDST : MEM; +%} + +pipe_class fp_load_mem_d(fRegD dst, memory mem) +%{ + single_instruction; + mem : S1(read); + dst : S5(write); + DECODE : ID; + LDST : MEM; +%} + +pipe_class fp_store_reg_s(fRegF src, memory mem) +%{ + single_instruction; + src : S1(read); + mem : S5(write); + DECODE : ID; + LDST : MEM; +%} + +pipe_class fp_store_reg_d(fRegD src, memory mem) +%{ + single_instruction; + src : S1(read); + mem : S5(write); + DECODE : ID; + LDST : MEM; +%} + +//------- Integer ALU operations -------------------------- + +// Integer ALU reg-reg operation +// Operands needs in ID, result generated in EX +// E.g. ADD Rd, Rs1, Rs2 +pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + dst : EX(write); + src1 : ID(read); + src2 : ID(read); + DECODE : ID; + ALU : EX; +%} + +// Integer ALU reg operation with constant shift +// E.g. SLLI Rd, Rs1, #shift +pipe_class ialu_reg_shift(iRegI dst, iRegI src1) +%{ + single_instruction; + dst : EX(write); + src1 : ID(read); + DECODE : ID; + ALU : EX; +%} + +// Integer ALU reg-reg operation with variable shift +// both operands must be available in ID +// E.g. SLL Rd, Rs1, Rs2 +pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + dst : EX(write); + src1 : ID(read); + src2 : ID(read); + DECODE : ID; + ALU : EX; +%} + +// Integer ALU reg operation +// E.g. NEG Rd, Rs2 +pipe_class ialu_reg(iRegI dst, iRegI src) +%{ + single_instruction; + dst : EX(write); + src : ID(read); + DECODE : ID; + ALU : EX; +%} + +// Integer ALU reg immediate operation +// E.g. ADDI Rd, Rs1, #imm +pipe_class ialu_reg_imm(iRegI dst, iRegI src1) +%{ + single_instruction; + dst : EX(write); + src1 : ID(read); + DECODE : ID; + ALU : EX; +%} + +// Integer ALU immediate operation (no source operands) +// E.g. LI Rd, #imm +pipe_class ialu_imm(iRegI dst) +%{ + single_instruction; + dst : EX(write); + DECODE : ID; + ALU : EX; +%} + +//------- Multiply pipeline operations -------------------- + +// Multiply reg-reg +// E.g. MULW Rd, Rs1, Rs2 +pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + dst : WR(write); + src1 : ID(read); + src2 : ID(read); + DECODE : ID; + MUL : WR; +%} + +// E.g. MUL RD, Rs1, Rs2 +pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + fixed_latency(3); // Maximum latency for 64 bit mul + dst : WR(write); + src1 : ID(read); + src2 : ID(read); + DECODE : ID; + MUL : WR; +%} + +//------- Divide pipeline operations -------------------- + +// E.g. DIVW Rd, Rs1, Rs2 +pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + fixed_latency(8); // Maximum latency for 32 bit divide + dst : WR(write); + src1 : ID(read); + src2 : ID(read); + DECODE : ID; + DIV : WR; +%} + +// E.g. DIV RD, Rs1, Rs2 +pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + fixed_latency(16); // Maximum latency for 64 bit divide + dst : WR(write); + src1 : ID(read); + src2 : ID(read); + DECODE : ID; + DIV : WR; +%} + +//------- Load pipeline operations ------------------------ + +// Load - reg, mem +// E.g. LA Rd, mem +pipe_class iload_reg_mem(iRegI dst, memory mem) +%{ + single_instruction; + dst : WR(write); + mem : ID(read); + DECODE : ID; + LDST : MEM; +%} + +// Load - reg, reg +// E.g. LD Rd, Rs +pipe_class iload_reg_reg(iRegI dst, iRegI src) +%{ + single_instruction; + dst : WR(write); + src : ID(read); + DECODE : ID; + LDST : MEM; +%} + +//------- Store pipeline operations ----------------------- + +// Store - zr, mem +// E.g. SD zr, mem +pipe_class istore_mem(memory mem) +%{ + single_instruction; + mem : ID(read); + DECODE : ID; + LDST : MEM; +%} + +// Store - reg, mem +// E.g. SD Rs, mem +pipe_class istore_reg_mem(iRegI src, memory mem) +%{ + single_instruction; + mem : ID(read); + src : EX(read); + DECODE : ID; + LDST : MEM; +%} + +// Store - reg, reg +// E.g. SD Rs2, Rs1 +pipe_class istore_reg_reg(iRegI dst, iRegI src) +%{ + single_instruction; + dst : ID(read); + src : EX(read); + DECODE : ID; + LDST : MEM; +%} + +//------- Store pipeline operations ----------------------- + +// Branch +pipe_class pipe_branch() +%{ + single_instruction; + DECODE : ID; + BRANCH : EX; +%} + +// Branch +pipe_class pipe_branch_reg(iRegI src) +%{ + single_instruction; + src : ID(read); + DECODE : ID; + BRANCH : EX; +%} + +// Compare & Branch +// E.g. BEQ Rs1, Rs2, L +pipe_class pipe_cmp_branch(iRegI src1, iRegI src2) +%{ + single_instruction; + src1 : ID(read); + src2 : ID(read); + DECODE : ID; + BRANCH : EX; +%} + +// E.g. BEQZ Rs, L +pipe_class pipe_cmpz_branch(iRegI src) +%{ + single_instruction; + src : ID(read); + DECODE : ID; + BRANCH : EX; +%} + +//------- Synchronisation operations ---------------------- +// Any operation requiring serialization +// E.g. FENCE/Atomic Ops/Load Acquire/Store Release +pipe_class pipe_serial() +%{ + single_instruction; + force_serialization; + fixed_latency(16); + DECODE : ID; + LDST : MEM; +%} + +pipe_class pipe_slow() +%{ + instruction_count(10); + multiple_bundles; + force_serialization; + fixed_latency(16); + DECODE : ID; + LDST : MEM; +%} + +// Empty pipeline class +pipe_class pipe_class_empty() +%{ + single_instruction; + fixed_latency(0); +%} + +// Default pipeline class. +pipe_class pipe_class_default() +%{ + single_instruction; + fixed_latency(2); +%} + +// Pipeline class for compares. +pipe_class pipe_class_compare() +%{ + single_instruction; + fixed_latency(16); +%} + +// Pipeline class for memory operations. +pipe_class pipe_class_memory() +%{ + single_instruction; + fixed_latency(16); +%} + +// Pipeline class for call. +pipe_class pipe_class_call() +%{ + single_instruction; + fixed_latency(100); +%} + +// Define the class for the Nop node. +define %{ + MachNop = pipe_class_empty; +%} +%} +//----------INSTRUCTIONS------------------------------------------------------- +// +// match -- States which machine-independent subtree may be replaced +// by this instruction. +// ins_cost -- The estimated cost of this instruction is used by instruction +// selection to identify a minimum cost tree of machine +// instructions that matches a tree of machine-independent +// instructions. +// format -- A string providing the disassembly for this instruction. +// The value of an instruction's operand may be inserted +// by referring to it with a '$' prefix. +// opcode -- Three instruction opcodes may be provided. These are referred +// to within an encode class as $primary, $secondary, and $tertiary +// rrspectively. The primary opcode is commonly used to +// indicate the type of machine instruction, while secondary +// and tertiary are often used for prefix options or addressing +// modes. +// ins_encode -- A list of encode classes with parameters. The encode class +// name must have been defined in an 'enc_class' specification +// in the encode section of the architecture description. + +// ============================================================================ +// Memory (Load/Store) Instructions + +// Load Instructions + +// Load Byte (8 bit signed) +instruct loadB(iRegINoSp dst, memory mem) +%{ + match(Set dst (LoadB mem)); + + ins_cost(LOAD_COST); + format %{ "lb $dst, $mem\t# byte, #@loadB" %} + + ins_encode %{ + __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Byte (8 bit signed) into long +instruct loadB2L(iRegLNoSp dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadB mem))); + + ins_cost(LOAD_COST); + format %{ "lb $dst, $mem\t# byte, #@loadB2L" %} + + ins_encode %{ + __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Byte (8 bit unsigned) +instruct loadUB(iRegINoSp dst, memory mem) +%{ + match(Set dst (LoadUB mem)); + + ins_cost(LOAD_COST); + format %{ "lbu $dst, $mem\t# byte, #@loadUB" %} + + ins_encode %{ + __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Byte (8 bit unsigned) into long +instruct loadUB2L(iRegLNoSp dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadUB mem))); + + ins_cost(LOAD_COST); + format %{ "lbu $dst, $mem\t# byte, #@loadUB2L" %} + + ins_encode %{ + __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Short (16 bit signed) +instruct loadS(iRegINoSp dst, memory mem) +%{ + match(Set dst (LoadS mem)); + + ins_cost(LOAD_COST); + format %{ "lh $dst, $mem\t# short, #@loadS" %} + + ins_encode %{ + __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Short (16 bit signed) into long +instruct loadS2L(iRegLNoSp dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadS mem))); + + ins_cost(LOAD_COST); + format %{ "lh $dst, $mem\t# short, #@loadS2L" %} + + ins_encode %{ + __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Char (16 bit unsigned) +instruct loadUS(iRegINoSp dst, memory mem) +%{ + match(Set dst (LoadUS mem)); + + ins_cost(LOAD_COST); + format %{ "lhu $dst, $mem\t# short, #@loadUS" %} + + ins_encode %{ + __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Short/Char (16 bit unsigned) into long +instruct loadUS2L(iRegLNoSp dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadUS mem))); + + ins_cost(LOAD_COST); + format %{ "lhu $dst, $mem\t# short, #@loadUS2L" %} + + ins_encode %{ + __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Integer (32 bit signed) +instruct loadI(iRegINoSp dst, memory mem) +%{ + match(Set dst (LoadI mem)); + + ins_cost(LOAD_COST); + format %{ "lw $dst, $mem\t# int, #@loadI" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Integer (32 bit signed) into long +instruct loadI2L(iRegLNoSp dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadI mem))); + + ins_cost(LOAD_COST); + format %{ "lw $dst, $mem\t# int, #@loadI2L" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Integer (32 bit unsigned) into long +instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) +%{ + match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); + + ins_cost(LOAD_COST); + format %{ "lwu $dst, $mem\t# int, #@loadUI2L" %} + + ins_encode %{ + __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Long (64 bit signed) +instruct loadL(iRegLNoSp dst, memory mem) +%{ + match(Set dst (LoadL mem)); + + ins_cost(LOAD_COST); + format %{ "ld $dst, $mem\t# int, #@loadL" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Range +instruct loadRange(iRegINoSp dst, memory mem) +%{ + match(Set dst (LoadRange mem)); + + ins_cost(LOAD_COST); + format %{ "lwu $dst, $mem\t# range, #@loadRange" %} + + ins_encode %{ + __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Pointer +instruct loadP(iRegPNoSp dst, memory mem) +%{ + match(Set dst (LoadP mem)); + predicate(n->as_Load()->barrier_data() == 0); + + ins_cost(LOAD_COST); + format %{ "ld $dst, $mem\t# ptr, #@loadP" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Compressed Pointer +instruct loadN(iRegNNoSp dst, memory mem) +%{ + match(Set dst (LoadN mem)); + + ins_cost(LOAD_COST); + format %{ "lwu $dst, $mem\t# loadN, compressed ptr, #@loadN" %} + + ins_encode %{ + __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Klass Pointer +instruct loadKlass(iRegPNoSp dst, memory mem) +%{ + match(Set dst (LoadKlass mem)); + + ins_cost(LOAD_COST); + format %{ "ld $dst, $mem\t# class, #@loadKlass" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Narrow Klass Pointer +instruct loadNKlass(iRegNNoSp dst, memory mem) +%{ + match(Set dst (LoadNKlass mem)); + + ins_cost(LOAD_COST); + format %{ "lwu $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %} + + ins_encode %{ + __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(iload_reg_mem); +%} + +// Load Float +instruct loadF(fRegF dst, memory mem) +%{ + match(Set dst (LoadF mem)); + + ins_cost(LOAD_COST); + format %{ "flw $dst, $mem\t# float, #@loadF" %} + + ins_encode %{ + __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(fp_load_mem_s); +%} + +// Load Double +instruct loadD(fRegD dst, memory mem) +%{ + match(Set dst (LoadD mem)); + + ins_cost(LOAD_COST); + format %{ "fld $dst, $mem\t# double, #@loadD" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(fp_load_mem_d); +%} + +// Load Int Constant +instruct loadConI(iRegINoSp dst, immI src) +%{ + match(Set dst src); + + ins_cost(ALU_COST); + format %{ "li $dst, $src\t# int, #@loadConI" %} + + ins_encode(riscv_enc_li_imm(dst, src)); + + ins_pipe(ialu_imm); +%} + +// Load Long Constant +instruct loadConL(iRegLNoSp dst, immL src) +%{ + match(Set dst src); + + ins_cost(ALU_COST); + format %{ "li $dst, $src\t# long, #@loadConL" %} + + ins_encode(riscv_enc_li_imm(dst, src)); + + ins_pipe(ialu_imm); +%} + +// Load Pointer Constant +instruct loadConP(iRegPNoSp dst, immP con) +%{ + match(Set dst con); + + ins_cost(ALU_COST); + format %{ "mv $dst, $con\t# ptr, #@loadConP" %} + + ins_encode(riscv_enc_mov_p(dst, con)); + + ins_pipe(ialu_imm); +%} + +// Load Null Pointer Constant +instruct loadConP0(iRegPNoSp dst, immP0 con) +%{ + match(Set dst con); + + ins_cost(ALU_COST); + format %{ "mv $dst, $con\t# NULL ptr, #@loadConP0" %} + + ins_encode(riscv_enc_mov_zero(dst)); + + ins_pipe(ialu_imm); +%} + +// Load Pointer Constant One +instruct loadConP1(iRegPNoSp dst, immP_1 con) +%{ + match(Set dst con); + + ins_cost(ALU_COST); + format %{ "mv $dst, $con\t# load ptr constant one, #@loadConP1" %} + + ins_encode(riscv_enc_mov_p1(dst)); + + ins_pipe(ialu_imm); +%} + +// Load Byte Map Base Constant +instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con) +%{ + match(Set dst con); + ins_cost(ALU_COST); + format %{ "mv $dst, $con\t# Byte Map Base, #@loadByteMapBase" %} + + ins_encode(riscv_enc_mov_byte_map_base(dst)); + + ins_pipe(ialu_imm); +%} + +// Load Narrow Pointer Constant +instruct loadConN(iRegNNoSp dst, immN con) +%{ + match(Set dst con); + + ins_cost(ALU_COST * 4); + format %{ "mv $dst, $con\t# compressed ptr, #@loadConN" %} + + ins_encode(riscv_enc_mov_n(dst, con)); + + ins_pipe(ialu_imm); +%} + +// Load Narrow Null Pointer Constant +instruct loadConN0(iRegNNoSp dst, immN0 con) +%{ + match(Set dst con); + + ins_cost(ALU_COST); + format %{ "mv $dst, $con\t# compressed NULL ptr, #@loadConN0" %} + + ins_encode(riscv_enc_mov_zero(dst)); + + ins_pipe(ialu_imm); +%} + +// Load Narrow Klass Constant +instruct loadConNKlass(iRegNNoSp dst, immNKlass con) +%{ + match(Set dst con); + + ins_cost(ALU_COST * 6); + format %{ "mv $dst, $con\t# compressed klass ptr, #@loadConNKlass" %} + + ins_encode(riscv_enc_mov_nk(dst, con)); + + ins_pipe(ialu_imm); +%} + +// Load Float Constant +instruct loadConF(fRegF dst, immF con) %{ + match(Set dst con); + + ins_cost(LOAD_COST); + format %{ + "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF" + %} + + ins_encode %{ + __ flw(as_FloatRegister($dst$$reg), $constantaddress($con)); + %} + + ins_pipe(fp_load_constant_s); +%} + +instruct loadConF0(fRegF dst, immF0 con) %{ + match(Set dst con); + + ins_cost(XFER_COST); + + format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %} + + ins_encode %{ + __ fmv_w_x(as_FloatRegister($dst$$reg), zr); + %} + + ins_pipe(fp_load_constant_s); +%} + +// Load Double Constant +instruct loadConD(fRegD dst, immD con) %{ + match(Set dst con); + + ins_cost(LOAD_COST); + format %{ + "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD" + %} + + ins_encode %{ + __ fld(as_FloatRegister($dst$$reg), $constantaddress($con)); + %} + + ins_pipe(fp_load_constant_d); +%} + +instruct loadConD0(fRegD dst, immD0 con) %{ + match(Set dst con); + + ins_cost(XFER_COST); + + format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %} + + ins_encode %{ + __ fmv_d_x(as_FloatRegister($dst$$reg), zr); + %} + + ins_pipe(fp_load_constant_d); +%} + +// Store Instructions +// Store CMS card-mark Immediate +instruct storeimmCM0(immI0 zero, memory mem) +%{ + match(Set mem (StoreCM mem zero)); + + ins_cost(STORE_COST); + format %{ "storestore (elided)\n\t" + "sb zr, $mem\t# byte, #@storeimmCM0" %} + + ins_encode %{ + __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(istore_mem); +%} + +// Store CMS card-mark Immediate with intervening StoreStore +// needed when using CMS with no conditional card marking +instruct storeimmCM0_ordered(immI0 zero, memory mem) +%{ + match(Set mem (StoreCM mem zero)); + + ins_cost(ALU_COST + STORE_COST); + format %{ "membar(StoreStore)\n\t" + "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %} + + ins_encode %{ + __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(istore_mem); +%} + +// Store Byte +instruct storeB(iRegIorL2I src, memory mem) +%{ + match(Set mem (StoreB mem src)); + + ins_cost(STORE_COST); + format %{ "sb $src, $mem\t# byte, #@storeB" %} + + ins_encode %{ + __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(istore_reg_mem); +%} + +instruct storeimmB0(immI0 zero, memory mem) +%{ + match(Set mem (StoreB mem zero)); + + ins_cost(STORE_COST); + format %{ "sb zr, $mem\t# byte, #@storeimmB0" %} + + ins_encode %{ + __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(istore_mem); +%} + +// Store Char/Short +instruct storeC(iRegIorL2I src, memory mem) +%{ + match(Set mem (StoreC mem src)); + + ins_cost(STORE_COST); + format %{ "sh $src, $mem\t# short, #@storeC" %} + + ins_encode %{ + __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(istore_reg_mem); +%} + +instruct storeimmC0(immI0 zero, memory mem) +%{ + match(Set mem (StoreC mem zero)); + + ins_cost(STORE_COST); + format %{ "sh zr, $mem\t# short, #@storeimmC0" %} + + ins_encode %{ + __ sh(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(istore_mem); +%} + +// Store Integer +instruct storeI(iRegIorL2I src, memory mem) +%{ + match(Set mem(StoreI mem src)); + + ins_cost(STORE_COST); + format %{ "sw $src, $mem\t# int, #@storeI" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(istore_reg_mem); +%} + +instruct storeimmI0(immI0 zero, memory mem) +%{ + match(Set mem(StoreI mem zero)); + + ins_cost(STORE_COST); + format %{ "sw zr, $mem\t# int, #@storeimmI0" %} + + ins_encode %{ + __ sw(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(istore_mem); +%} + +// Store Long (64 bit signed) +instruct storeL(iRegL src, memory mem) +%{ + match(Set mem (StoreL mem src)); + + ins_cost(STORE_COST); + format %{ "sd $src, $mem\t# long, #@storeL" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(istore_reg_mem); +%} + +// Store Long (64 bit signed) +instruct storeimmL0(immL0 zero, memory mem) +%{ + match(Set mem (StoreL mem zero)); + + ins_cost(STORE_COST); + format %{ "sd zr, $mem\t# long, #@storeimmL0" %} + + ins_encode %{ + __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(istore_mem); +%} + +// Store Pointer +instruct storeP(iRegP src, memory mem) +%{ + match(Set mem (StoreP mem src)); + + ins_cost(STORE_COST); + format %{ "sd $src, $mem\t# ptr, #@storeP" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(istore_reg_mem); +%} + +// Store Pointer +instruct storeimmP0(immP0 zero, memory mem) +%{ + match(Set mem (StoreP mem zero)); + + ins_cost(STORE_COST); + format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %} + + ins_encode %{ + __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(istore_mem); +%} + +// Store Compressed Pointer +instruct storeN(iRegN src, memory mem) +%{ + match(Set mem (StoreN mem src)); + + ins_cost(STORE_COST); + format %{ "sw $src, $mem\t# compressed ptr, #@storeN" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(istore_reg_mem); +%} + +instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) +%{ + match(Set mem (StoreN mem zero)); + + ins_cost(STORE_COST); + format %{ "sw rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %} + + ins_encode %{ + __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(istore_reg_mem); +%} + +// Store Float +instruct storeF(fRegF src, memory mem) +%{ + match(Set mem (StoreF mem src)); + + ins_cost(STORE_COST); + format %{ "fsw $src, $mem\t# float, #@storeF" %} + + ins_encode %{ + __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(fp_store_reg_s); +%} + +// Store Double +instruct storeD(fRegD src, memory mem) +%{ + match(Set mem (StoreD mem src)); + + ins_cost(STORE_COST); + format %{ "fsd $src, $mem\t# double, #@storeD" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(fp_store_reg_d); +%} + +// Store Compressed Klass Pointer +instruct storeNKlass(iRegN src, memory mem) +%{ + match(Set mem (StoreNKlass mem src)); + + ins_cost(STORE_COST); + format %{ "sw $src, $mem\t# compressed klass ptr, #@storeNKlass" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + + ins_pipe(istore_reg_mem); +%} + +// ============================================================================ +// Atomic operation instructions +// +// Intel and SPARC both implement Ideal Node LoadPLocked and +// Store{PIL}Conditional instructions using a normal load for the +// LoadPLocked and a CAS for the Store{PIL}Conditional. +// +// The ideal code appears only to use LoadPLocked/storePConditional as a +// pair to lock object allocations from Eden space when not using +// TLABs. +// +// There does not appear to be a Load{IL}Locked Ideal Node and the +// Ideal code appears to use Store{IL}Conditional as an alias for CAS +// and to use StoreIConditional only for 32-bit and StoreLConditional +// only for 64-bit. +// +// We implement LoadPLocked and storePConditional instructions using, +// respectively the RISCV hw load-reserve and store-conditional +// instructions. Whereas we must implement each of +// Store{IL}Conditional using a CAS which employs a pair of +// instructions comprising a load-reserve followed by a +// store-conditional. + + +// Locked-load (load reserved) of the current heap-top +// used when updating the eden heap top +// implemented using lr_d on RISCV64 +instruct loadPLocked(iRegPNoSp dst, indirect mem) +%{ + match(Set dst (LoadPLocked mem)); + + ins_cost(ALU_COST * 2 + LOAD_COST); + + format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %} + + ins_encode %{ + __ la(t0, Address(as_Register($mem$$base), $mem$$disp)); + __ lr_d($dst$$Register, t0, Assembler::aq); + %} + + ins_pipe(pipe_serial); +%} + +// Conditional-store of the updated heap-top. +// Used during allocation of the shared heap. +// implemented using sc_d on RISCV64. +instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) +%{ + match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); + + ins_cost(ALU_COST * 2 + STORE_COST); + + format %{ + "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional" + %} + + ins_encode %{ + __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp)); + __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl); + %} + + ins_pipe(pipe_serial); +%} + +instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) +%{ + match(Set cr (StoreLConditional mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST); + + format %{ + "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" + "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); + __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); + %} + + ins_pipe(pipe_slow); +%} + +// storeIConditional also has acquire semantics, for no better reason +// than matching storeLConditional. +instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) +%{ + match(Set cr (StoreIConditional mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2); + + format %{ + "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" + "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); + __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); + %} + + ins_pipe(pipe_slow); +%} + +// standard CompareAndSwapX when we are using barriers +// these have higher priority than the rules selected by a predicate +instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ + match(Set res (CompareAndSwapB mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); + + effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB" + %} + + ins_encode %{ + __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ + match(Set res (CompareAndSwapS mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); + + effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS" + %} + + ins_encode %{ + __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval) +%{ + match(Set res (CompareAndSwapI mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI" + %} + + ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval)); + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval) +%{ + match(Set res (CompareAndSwapL mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL" + %} + + ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ + predicate(n->as_LoadStore()->barrier_data() == 0); + + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP" + %} + + ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval) +%{ + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN" + %} + + ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval)); + + ins_pipe(pipe_slow); +%} + +// alternative CompareAndSwapX when we are eliding barriers +instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapB mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq" + %} + + ins_encode %{ + __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, + Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapS mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq" + %} + + ins_encode %{ + __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, + Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapI mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq" + %} + + ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval)); + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapL mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq" + %} + + ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ + predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); + + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq" + %} + + ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq" + %} + + ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval)); + + ins_pipe(pipe_slow); +%} + +// Sundry CAS operations. Note that release is always true, +// regardless of the memory ordering of the CAS. This is because we +// need the volatile case to be sequentially consistent but there is +// no trailing StoreLoad barrier emitted by C2. Unfortunately we +// can't check the type of memory ordering here, so we always emit a +// sc_d(w) with rl bit set. +instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ + match(Set res (CompareAndExchangeB mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB" + %} + + ins_encode %{ + __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ + match(Set res (CompareAndExchangeS mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS" + %} + + ins_encode %{ + __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ + match(Set res (CompareAndExchangeI mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + + effect(TEMP_DEF res); + + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) +%{ + match(Set res (CompareAndExchangeL mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + + effect(TEMP_DEF res); + + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) +%{ + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3); + + effect(TEMP_DEF res); + + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ + predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + + effect(TEMP_DEF res); + + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeB mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ + "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq" + %} + + ins_encode %{ + __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeS mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ + "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq" + %} + + ins_encode %{ + __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeI mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + + effect(TEMP_DEF res); + + format %{ + "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeL mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + + effect(TEMP_DEF res); + + format %{ + "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + + effect(TEMP_DEF res); + + format %{ + "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ + predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); + + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + + effect(TEMP_DEF res); + + format %{ + "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ + match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ + "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapB" + %} + + ins_encode %{ + __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ + match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ + "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapS" + %} + + ins_encode %{ + __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ + match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + + format %{ + "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapI" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) +%{ + match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + + format %{ + "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapL" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) +%{ + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); + + format %{ + "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapN" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ + predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + + format %{ + "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapP" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ + "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapBAcq" + %} + + ins_encode %{ + __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ + "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapSAcq" + %} + + ins_encode %{ + __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + + format %{ + "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapIAcq" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + + format %{ + "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapLAcq" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); + + format %{ + "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapNAcq" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ + predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); + + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + + format %{ + "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapPAcq" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + +instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) +%{ + match(Set prev (GetAndSetI mem newv)); + + ins_cost(ALU_COST); + + format %{ "atomic_xchgw $prev, $newv, [$mem]\t#@get_and_setI" %} + + ins_encode %{ + __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) +%{ + match(Set prev (GetAndSetL mem newv)); + + ins_cost(ALU_COST); + + format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setL" %} + + ins_encode %{ + __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) +%{ + match(Set prev (GetAndSetN mem newv)); + + ins_cost(ALU_COST); + + format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %} + + ins_encode %{ + __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) +%{ + predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set prev (GetAndSetP mem newv)); + + ins_cost(ALU_COST); + + format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setP" %} + + ins_encode %{ + __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set prev (GetAndSetI mem newv)); + + ins_cost(ALU_COST); + + format %{ "atomic_xchgw_acq $prev, $newv, [$mem]\t#@get_and_setIAcq" %} + + ins_encode %{ + __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set prev (GetAndSetL mem newv)); + + ins_cost(ALU_COST); + + format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setLAcq" %} + + ins_encode %{ + __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set prev (GetAndSetN mem newv)); + + ins_cost(ALU_COST); + + format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %} + + ins_encode %{ + __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) +%{ + predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); + + match(Set prev (GetAndSetP mem newv)); + + ins_cost(ALU_COST); + + format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setPAcq" %} + + ins_encode %{ + __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) +%{ + match(Set newval (GetAndAddL mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %} + + ins_encode %{ + __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) +%{ + predicate(n->as_LoadStore()->result_not_used()); + + match(Set dummy (GetAndAddL mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %} + + ins_encode %{ + __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr) +%{ + match(Set newval (GetAndAddL mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %} + + ins_encode %{ + __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr) +%{ + predicate(n->as_LoadStore()->result_not_used()); + + match(Set dummy (GetAndAddL mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %} + + ins_encode %{ + __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) +%{ + match(Set newval (GetAndAddI mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %} + + ins_encode %{ + __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) +%{ + predicate(n->as_LoadStore()->result_not_used()); + + match(Set dummy (GetAndAddI mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %} + + ins_encode %{ + __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr) +%{ + match(Set newval (GetAndAddI mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %} + + ins_encode %{ + __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr) +%{ + predicate(n->as_LoadStore()->result_not_used()); + + match(Set dummy (GetAndAddI mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %} + + ins_encode %{ + __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set newval (GetAndAddL mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %} + + ins_encode %{ + __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{ + predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + + match(Set dummy (GetAndAddL mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %} + + ins_encode %{ + __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set newval (GetAndAddL mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %} + + ins_encode %{ + __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr) +%{ + predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + + match(Set dummy (GetAndAddL mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %} + + ins_encode %{ + __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set newval (GetAndAddI mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %} + + ins_encode %{ + __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) +%{ + predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + + match(Set dummy (GetAndAddI mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %} + + ins_encode %{ + __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr) +%{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set newval (GetAndAddI mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %} + + ins_encode %{ + __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr) +%{ + predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + + match(Set dummy (GetAndAddI mem incr)); + + ins_cost(ALU_COST); + + format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %} + + ins_encode %{ + __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base)); + %} + + ins_pipe(pipe_serial); +%} + +// ============================================================================ +// Arithmetic Instructions +// + +// Integer Addition + +// TODO +// these currently employ operations which do not set CR and hence are +// not flagged as killing CR but we would like to isolate the cases +// where we want to set flags from those where we don't. need to work +// out how to do that. +instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (AddI src1 src2)); + + ins_cost(ALU_COST); + format %{ "addw $dst, $src1, $src2\t#@addI_reg_reg" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ addw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{ + match(Set dst (AddI src1 src2)); + + ins_cost(ALU_COST); + format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + int32_t con = (int32_t)$src2$$constant; + __ addiw(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant); + %} + + ins_pipe(ialu_reg_imm); +%} + +instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{ + match(Set dst (AddI (ConvL2I src1) src2)); + + ins_cost(ALU_COST); + format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm_l2i" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ addiw(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant); + %} + + ins_pipe(ialu_reg_imm); +%} + +// Pointer Addition +instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{ + match(Set dst (AddP src1 src2)); + + ins_cost(ALU_COST); + format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ add(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// If we shift more than 32 bits, we need not convert I2L. +instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{ + match(Set dst (LShiftL (ConvI2L src) scale)); + ins_cost(ALU_COST); + format %{ "slli $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63); + %} + + ins_pipe(ialu_reg_shift); +%} + +// Pointer Immediate Addition +// n.b. this needs to be more expensive than using an indirect memory +// operand +instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{ + match(Set dst (AddP src1 src2)); + ins_cost(ALU_COST); + format %{ "addi $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + // src2 is imm, so actually call the addi + __ add(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant); + %} + + ins_pipe(ialu_reg_imm); +%} + +// Long Addition +instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ + match(Set dst (AddL src1 src2)); + ins_cost(ALU_COST); + format %{ "add $dst, $src1, $src2\t#@addL_reg_reg" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ add(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// No constant pool entries requiredLong Immediate Addition. +instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ + match(Set dst (AddL src1 src2)); + ins_cost(ALU_COST); + format %{ "addi $dst, $src1, $src2\t#@addL_reg_imm" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + // src2 is imm, so actually call the addi + __ add(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant); + %} + + ins_pipe(ialu_reg_imm); +%} + +// Integer Subtraction +instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (SubI src1 src2)); + + ins_cost(ALU_COST); + format %{ "subw $dst, $src1, $src2\t#@subI_reg_reg" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ subw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Immediate Subtraction +instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{ + match(Set dst (SubI src1 src2)); + + ins_cost(ALU_COST); + format %{ "addiw $dst, $src1, -$src2\t#@subI_reg_imm" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + // src2 is imm, so actually call the addiw + __ subw(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant); + %} + + ins_pipe(ialu_reg_imm); +%} + +// Long Subtraction +instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ + match(Set dst (SubL src1 src2)); + ins_cost(ALU_COST); + format %{ "sub $dst, $src1, $src2\t#@subL_reg_reg" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ sub(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// No constant pool entries requiredLong Immediate Subtraction. +instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{ + match(Set dst (SubL src1 src2)); + ins_cost(ALU_COST); + format %{ "addi $dst, $src1, -$src2\t#@subL_reg_imm" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + // src2 is imm, so actually call the addi + __ sub(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant); + %} + + ins_pipe(ialu_reg_imm); +%} + +// Integer Negation (special case for sub) + +instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero) %{ + match(Set dst (SubI zero src)); + ins_cost(ALU_COST); + format %{ "subw $dst, x0, $src\t# int, #@negI_reg" %} + + ins_encode %{ + // actually call the subw + __ negw(as_Register($dst$$reg), + as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// Long Negation + +instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero) %{ + match(Set dst (SubL zero src)); + ins_cost(ALU_COST); + format %{ "sub $dst, x0, $src\t# long, #@negL_reg" %} + + ins_encode %{ + // actually call the sub + __ neg(as_Register($dst$$reg), + as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// Integer Multiply + +instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (MulI src1 src2)); + ins_cost(IMUL_COST); + format %{ "mulw $dst, $src1, $src2\t#@mulI" %} + + //this means 2 word multi, and no sign extend to 64 bits + ins_encode %{ + // riscv64 mulw will sign-extension to high 32 bits in dst reg + __ mulw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(imul_reg_reg); +%} + +// Long Multiply + +instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ + match(Set dst (MulL src1 src2)); + ins_cost(IMUL_COST); + format %{ "mul $dst, $src1, $src2\t#@mulL" %} + + ins_encode %{ + __ mul(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(lmul_reg_reg); +%} + +instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2) +%{ + match(Set dst (MulHiL src1 src2)); + ins_cost(IMUL_COST); + format %{ "mulh $dst, $src1, $src2\t# mulhi, #@mulHiL_rReg" %} + + ins_encode %{ + __ mulh(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(lmul_reg_reg); +%} + +// Integer Divide + +instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (DivI src1 src2)); + ins_cost(IDIVSI_COST); + format %{ "divw $dst, $src1, $src2\t#@divI"%} + + ins_encode(riscv_enc_divw(dst, src1, src2)); + ins_pipe(idiv_reg_reg); +%} + +instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{ + match(Set dst (URShiftI (RShiftI src1 div1) div2)); + ins_cost(ALU_COST); + format %{ "srliw $dst, $src1, $div1\t# int signExtract, #@signExtract" %} + + ins_encode %{ + __ srliw(as_Register($dst$$reg), as_Register($src1$$reg), 31); + %} + ins_pipe(ialu_reg_shift); +%} + +// Long Divide + +instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ + match(Set dst (DivL src1 src2)); + ins_cost(IDIVDI_COST); + format %{ "div $dst, $src1, $src2\t#@divL" %} + + ins_encode(riscv_enc_div(dst, src1, src2)); + ins_pipe(ldiv_reg_reg); +%} + +instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{ + match(Set dst (URShiftL (RShiftL src1 div1) div2)); + ins_cost(ALU_COST); + format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63); + %} + ins_pipe(ialu_reg_shift); +%} + +// Integer Remainder + +instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (ModI src1 src2)); + ins_cost(IDIVSI_COST); + format %{ "remw $dst, $src1, $src2\t#@modI" %} + + ins_encode(riscv_enc_modw(dst, src1, src2)); + ins_pipe(ialu_reg_reg); +%} + +// Long Remainder + +instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ + match(Set dst (ModL src1 src2)); + ins_cost(IDIVDI_COST); + format %{ "rem $dst, $src1, $src2\t#@modL" %} + + ins_encode(riscv_enc_mod(dst, src1, src2)); + ins_pipe(ialu_reg_reg); +%} + +// Integer Shifts + +// Shift Left Register +// In RV64I, only the low 5 bits of src2 are considered for the shift amount +instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (LShiftI src1 src2)); + ins_cost(ALU_COST); + format %{ "sllw $dst, $src1, $src2\t#@lShiftI_reg_reg" %} + + ins_encode %{ + __ sllw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg_vshift); +%} + +// Shift Left Immediate +instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ + match(Set dst (LShiftI src1 src2)); + ins_cost(ALU_COST); + format %{ "slliw $dst, $src1, ($src2 & 0x1f)\t#@lShiftI_reg_imm" %} + + ins_encode %{ + // the shift amount is encoded in the lower + // 5 bits of the I-immediate field for RV32I + __ slliw(as_Register($dst$$reg), + as_Register($src1$$reg), + (unsigned) $src2$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_shift); +%} + +// Shift Right Logical Register +// In RV64I, only the low 5 bits of src2 are considered for the shift amount +instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (URShiftI src1 src2)); + ins_cost(ALU_COST); + format %{ "srlw $dst, $src1, $src2\t#@urShiftI_reg_reg" %} + + ins_encode %{ + __ srlw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg_vshift); +%} + +// Shift Right Logical Immediate +instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ + match(Set dst (URShiftI src1 src2)); + ins_cost(ALU_COST); + format %{ "srliw $dst, $src1, ($src2 & 0x1f)\t#@urShiftI_reg_imm" %} + + ins_encode %{ + // the shift amount is encoded in the lower + // 6 bits of the I-immediate field for RV64I + __ srliw(as_Register($dst$$reg), + as_Register($src1$$reg), + (unsigned) $src2$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_shift); +%} + +// Shift Right Arithmetic Register +// In RV64I, only the low 5 bits of src2 are considered for the shift amount +instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (RShiftI src1 src2)); + ins_cost(ALU_COST); + format %{ "sraw $dst, $src1, $src2\t#@rShiftI_reg_reg" %} + + ins_encode %{ + // riscv will sign-ext dst high 32 bits + __ sraw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg_vshift); +%} + +// Shift Right Arithmetic Immediate +instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ + match(Set dst (RShiftI src1 src2)); + ins_cost(ALU_COST); + format %{ "sraiw $dst, $src1, ($src2 & 0x1f)\t#@rShiftI_reg_imm" %} + + ins_encode %{ + // riscv will sign-ext dst high 32 bits + __ sraiw(as_Register($dst$$reg), + as_Register($src1$$reg), + (unsigned) $src2$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_shift); +%} + +// Long Shifts + +// Shift Left Register +// In RV64I, only the low 6 bits of src2 are considered for the shift amount +instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ + match(Set dst (LShiftL src1 src2)); + + ins_cost(ALU_COST); + format %{ "sll $dst, $src1, $src2\t#@lShiftL_reg_reg" %} + + ins_encode %{ + __ sll(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg_vshift); +%} + +// Shift Left Immediate +instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ + match(Set dst (LShiftL src1 src2)); + + ins_cost(ALU_COST); + format %{ "slli $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + // the shift amount is encoded in the lower + // 6 bits of the I-immediate field for RV64I + __ slli(as_Register($dst$$reg), + as_Register($src1$$reg), + (unsigned) $src2$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_shift); +%} + +// Shift Right Logical Register +// In RV64I, only the low 6 bits of src2 are considered for the shift amount +instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ + match(Set dst (URShiftL src1 src2)); + + ins_cost(ALU_COST); + format %{ "srl $dst, $src1, $src2\t#@urShiftL_reg_reg" %} + + ins_encode %{ + __ srl(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg_vshift); +%} + +// Shift Right Logical Immediate +instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ + match(Set dst (URShiftL src1 src2)); + + ins_cost(ALU_COST); + format %{ "srli $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + // the shift amount is encoded in the lower + // 6 bits of the I-immediate field for RV64I + __ srli(as_Register($dst$$reg), + as_Register($src1$$reg), + (unsigned) $src2$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_shift); +%} + +// A special-case pattern for card table stores. +instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{ + match(Set dst (URShiftL (CastP2X src1) src2)); + + ins_cost(ALU_COST); + format %{ "srli $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + // the shift amount is encoded in the lower + // 6 bits of the I-immediate field for RV64I + __ srli(as_Register($dst$$reg), + as_Register($src1$$reg), + (unsigned) $src2$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_shift); +%} + +// Shift Right Arithmetic Register +// In RV64I, only the low 6 bits of src2 are considered for the shift amount +instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ + match(Set dst (RShiftL src1 src2)); + + ins_cost(ALU_COST); + format %{ "sra $dst, $src1, $src2\t#@rShiftL_reg_reg" %} + + ins_encode %{ + __ sra(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg_vshift); +%} + +// Shift Right Arithmetic Immediate +instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ + match(Set dst (RShiftL src1 src2)); + + ins_cost(ALU_COST); + format %{ "srai $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + // the shift amount is encoded in the lower + // 6 bits of the I-immediate field for RV64I + __ srai(as_Register($dst$$reg), + as_Register($src1$$reg), + (unsigned) $src2$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_shift); +%} + +instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{ + match(Set dst (XorI src1 m1)); + ins_cost(ALU_COST); + format %{ "xori $dst, $src1, -1\t#@regI_not_reg" %} + + ins_encode %{ + __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1); + %} + + ins_pipe(ialu_reg); +%} + +instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{ + match(Set dst (XorL src1 m1)); + ins_cost(ALU_COST); + format %{ "xori $dst, $src1, -1\t#@regL_not_reg" %} + + ins_encode %{ + __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1); + %} + + ins_pipe(ialu_reg); +%} + + +// ============================================================================ +// Floating Point Arithmetic Instructions + +instruct addF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ + match(Set dst (AddF src1 src2)); + + ins_cost(FMUL_SINGLE_COST); + format %{ "fadd.s $dst, $src1, $src2\t#@addF_reg_reg" %} + + ins_encode %{ + __ fadd_s(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_dop_reg_reg_s); +%} + +instruct addD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ + match(Set dst (AddD src1 src2)); + + ins_cost(FMUL_DOUBLE_COST); + format %{ "fadd.d $dst, $src1, $src2\t#@addD_reg_reg" %} + + ins_encode %{ + __ fadd_d(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_dop_reg_reg_d); +%} + +instruct subF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ + match(Set dst (SubF src1 src2)); + + ins_cost(FMUL_SINGLE_COST); + format %{ "fsub.s $dst, $src1, $src2\t#@subF_reg_reg" %} + + ins_encode %{ + __ fsub_s(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_dop_reg_reg_s); +%} + +instruct subD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ + match(Set dst (SubD src1 src2)); + + ins_cost(FMUL_DOUBLE_COST); + format %{ "fsub.d $dst, $src1, $src2\t#@subD_reg_reg" %} + + ins_encode %{ + __ fsub_d(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_dop_reg_reg_d); +%} + +instruct mulF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ + match(Set dst (MulF src1 src2)); + + ins_cost(FMUL_SINGLE_COST); + format %{ "fmul.s $dst, $src1, $src2\t#@mulF_reg_reg" %} + + ins_encode %{ + __ fmul_s(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_dop_reg_reg_s); +%} + +instruct mulD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ + match(Set dst (MulD src1 src2)); + + ins_cost(FMUL_DOUBLE_COST); + format %{ "fmul.d $dst, $src1, $src2\t#@mulD_reg_reg" %} + + ins_encode %{ + __ fmul_d(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_dop_reg_reg_d); +%} + +// src1 * src2 + src3 +instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ + predicate(UseFMA); + match(Set dst (FmaF src3 (Binary src1 src2))); + + ins_cost(FMUL_SINGLE_COST); + format %{ "fmadd.s $dst, $src1, $src2, $src3\t#@maddF_reg_reg" %} + + ins_encode %{ + __ fmadd_s(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), + as_FloatRegister($src3$$reg)); + %} + + ins_pipe(pipe_class_default); +%} + +// src1 * src2 + src3 +instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ + predicate(UseFMA); + match(Set dst (FmaD src3 (Binary src1 src2))); + + ins_cost(FMUL_DOUBLE_COST); + format %{ "fmadd.d $dst, $src1, $src2, $src3\t#@maddD_reg_reg" %} + + ins_encode %{ + __ fmadd_d(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), + as_FloatRegister($src3$$reg)); + %} + + ins_pipe(pipe_class_default); +%} + +// src1 * src2 - src3 +instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ + predicate(UseFMA); + match(Set dst (FmaF (NegF src3) (Binary src1 src2))); + + ins_cost(FMUL_SINGLE_COST); + format %{ "fmsub.s $dst, $src1, $src2, $src3\t#@msubF_reg_reg" %} + + ins_encode %{ + __ fmsub_s(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), + as_FloatRegister($src3$$reg)); + %} + + ins_pipe(pipe_class_default); +%} + +// src1 * src2 - src3 +instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ + predicate(UseFMA); + match(Set dst (FmaD (NegD src3) (Binary src1 src2))); + + ins_cost(FMUL_DOUBLE_COST); + format %{ "fmsub.d $dst, $src1, $src2, $src3\t#@msubD_reg_reg" %} + + ins_encode %{ + __ fmsub_d(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), + as_FloatRegister($src3$$reg)); + %} + + ins_pipe(pipe_class_default); +%} + +// -src1 * src2 + src3 +instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ + predicate(UseFMA); + match(Set dst (FmaF src3 (Binary (NegF src1) src2))); + match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); + + ins_cost(FMUL_SINGLE_COST); + format %{ "fnmsub.s $dst, $src1, $src2, $src3\t#@nmsubF_reg_reg" %} + + ins_encode %{ + __ fnmsub_s(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), + as_FloatRegister($src3$$reg)); + %} + + ins_pipe(pipe_class_default); +%} + +// -src1 * src2 + src3 +instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ + predicate(UseFMA); + match(Set dst (FmaD src3 (Binary (NegD src1) src2))); + match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); + + ins_cost(FMUL_DOUBLE_COST); + format %{ "fnmsub.d $dst, $src1, $src2, $src3\t#@nmsubD_reg_reg" %} + + ins_encode %{ + __ fnmsub_d(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), + as_FloatRegister($src3$$reg)); + %} + + ins_pipe(pipe_class_default); +%} + +// -src1 * src2 - src3 +instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ + predicate(UseFMA); + match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); + match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); + + ins_cost(FMUL_SINGLE_COST); + format %{ "fnmadd.s $dst, $src1, $src2, $src3\t#@nmaddF_reg_reg" %} + + ins_encode %{ + __ fnmadd_s(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), + as_FloatRegister($src3$$reg)); + %} + + ins_pipe(pipe_class_default); +%} + +// -src1 * src2 - src3 +instruct nmaddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ + predicate(UseFMA); + match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); + match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); + + ins_cost(FMUL_DOUBLE_COST); + format %{ "fnmadd.d $dst, $src1, $src2, $src3\t#@nmaddD_reg_reg" %} + + ins_encode %{ + __ fnmadd_d(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg), + as_FloatRegister($src3$$reg)); + %} + + ins_pipe(pipe_class_default); +%} + +// Math.max(FF)F +instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ + match(Set dst (MaxF src1 src2)); + effect(TEMP_DEF dst); + + format %{ "maxF $dst, $src1, $src2" %} + + ins_encode %{ + __ minmax_FD(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), + false /* is_double */, false /* is_min */); + %} + + ins_pipe(fp_dop_reg_reg_s); +%} + +// Math.min(FF)F +instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ + match(Set dst (MinF src1 src2)); + effect(TEMP_DEF dst); + + format %{ "minF $dst, $src1, $src2" %} + + ins_encode %{ + __ minmax_FD(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), + false /* is_double */, true /* is_min */); + %} + + ins_pipe(fp_dop_reg_reg_s); +%} + +// Math.max(DD)D +instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ + match(Set dst (MaxD src1 src2)); + effect(TEMP_DEF dst); + + format %{ "maxD $dst, $src1, $src2" %} + + ins_encode %{ + __ minmax_FD(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), + true /* is_double */, false /* is_min */); + %} + + ins_pipe(fp_dop_reg_reg_d); +%} + +// Math.min(DD)D +instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ + match(Set dst (MinD src1 src2)); + effect(TEMP_DEF dst); + + format %{ "minD $dst, $src1, $src2" %} + + ins_encode %{ + __ minmax_FD(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), + true /* is_double */, true /* is_min */); + %} + + ins_pipe(fp_dop_reg_reg_d); +%} + +instruct divF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ + match(Set dst (DivF src1 src2)); + + ins_cost(FDIV_COST); + format %{ "fdiv.s $dst, $src1, $src2\t#@divF_reg_reg" %} + + ins_encode %{ + __ fdiv_s(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_div_s); +%} + +instruct divD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ + match(Set dst (DivD src1 src2)); + + ins_cost(FDIV_COST); + format %{ "fdiv.d $dst, $src1, $src2\t#@divD_reg_reg" %} + + ins_encode %{ + __ fdiv_d(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_div_d); +%} + +instruct negF_reg_reg(fRegF dst, fRegF src) %{ + match(Set dst (NegF src)); + + ins_cost(XFER_COST); + format %{ "fsgnjn.s $dst, $src, $src\t#@negF_reg_reg" %} + + ins_encode %{ + __ fneg_s(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_uop_s); +%} + +instruct negD_reg_reg(fRegD dst, fRegD src) %{ + match(Set dst (NegD src)); + + ins_cost(XFER_COST); + format %{ "fsgnjn.d $dst, $src, $src\t#@negD_reg_reg" %} + + ins_encode %{ + __ fneg_d(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_uop_d); +%} + +instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{ + match(Set dst (AbsI src)); + + ins_cost(ALU_COST * 3); + format %{ + "sraiw t0, $src, 0x1f\n\t" + "addw $dst, $src, t0\n\t" + "xorr $dst, $dst, t0\t#@absI_reg" + %} + + ins_encode %{ + __ sraiw(t0, as_Register($src$$reg), 0x1f); + __ addw(as_Register($dst$$reg), as_Register($src$$reg), t0); + __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct absL_reg(iRegLNoSp dst, iRegL src) %{ + match(Set dst (AbsL src)); + + ins_cost(ALU_COST * 3); + format %{ + "srai t0, $src, 0x3f\n\t" + "add $dst, $src, t0\n\t" + "xorr $dst, $dst, t0\t#@absL_reg" + %} + + ins_encode %{ + __ srai(t0, as_Register($src$$reg), 0x3f); + __ add(as_Register($dst$$reg), as_Register($src$$reg), t0); + __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct absF_reg(fRegF dst, fRegF src) %{ + match(Set dst (AbsF src)); + + ins_cost(XFER_COST); + format %{ "fsgnjx.s $dst, $src, $src\t#@absF_reg" %} + ins_encode %{ + __ fabs_s(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_uop_s); +%} + +instruct absD_reg(fRegD dst, fRegD src) %{ + match(Set dst (AbsD src)); + + ins_cost(XFER_COST); + format %{ "fsgnjx.d $dst, $src, $src\t#@absD_reg" %} + ins_encode %{ + __ fabs_d(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_uop_d); +%} + +instruct sqrtF_reg(fRegF dst, fRegF src) %{ + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + + ins_cost(FSQRT_COST); + format %{ "fsqrt.s $dst, $src\t#@sqrtF_reg" %} + ins_encode %{ + __ fsqrt_s(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_sqrt_s); +%} + +instruct sqrtD_reg(fRegD dst, fRegD src) %{ + match(Set dst (SqrtD src)); + + ins_cost(FSQRT_COST); + format %{ "fsqrt.d $dst, $src\t#@sqrtD_reg" %} + ins_encode %{ + __ fsqrt_d(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_sqrt_d); +%} + +// Arithmetic Instructions End + +// ============================================================================ +// Logical Instructions + +// Register And +instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ + match(Set dst (AndI src1 src2)); + + format %{ "andr $dst, $src1, $src2\t#@andI_reg_reg" %} + + ins_cost(ALU_COST); + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ andr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Immediate And +instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ + match(Set dst (AndI src1 src2)); + + format %{ "andi $dst, $src1, $src2\t#@andI_reg_imm" %} + + ins_cost(ALU_COST); + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ andi(as_Register($dst$$reg), + as_Register($src1$$reg), + (int32_t)($src2$$constant)); + %} + + ins_pipe(ialu_reg_imm); +%} + +// Register Or +instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ + match(Set dst (OrI src1 src2)); + + format %{ "orr $dst, $src1, $src2\t#@orI_reg_reg" %} + + ins_cost(ALU_COST); + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ orr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Immediate Or +instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ + match(Set dst (OrI src1 src2)); + + format %{ "ori $dst, $src1, $src2\t#@orI_reg_imm" %} + + ins_cost(ALU_COST); + ins_encode %{ + __ ori(as_Register($dst$$reg), + as_Register($src1$$reg), + (int32_t)($src2$$constant)); + %} + + ins_pipe(ialu_reg_imm); +%} + +// Register Xor +instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ + match(Set dst (XorI src1 src2)); + + format %{ "xorr $dst, $src1, $src2\t#@xorI_reg_reg" %} + + ins_cost(ALU_COST); + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ xorr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Immediate Xor +instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ + match(Set dst (XorI src1 src2)); + + format %{ "xori $dst, $src1, $src2\t#@xorI_reg_imm" %} + + ins_cost(ALU_COST); + ins_encode %{ + __ xori(as_Register($dst$$reg), + as_Register($src1$$reg), + (int32_t)($src2$$constant)); + %} + + ins_pipe(ialu_reg_imm); +%} + +// Register And Long +instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ + match(Set dst (AndL src1 src2)); + + format %{ "andr $dst, $src1, $src2\t#@andL_reg_reg" %} + + ins_cost(ALU_COST); + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ andr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Immediate And Long +instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ + match(Set dst (AndL src1 src2)); + + format %{ "andi $dst, $src1, $src2\t#@andL_reg_imm" %} + + ins_cost(ALU_COST); + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ andi(as_Register($dst$$reg), + as_Register($src1$$reg), + (int32_t)($src2$$constant)); + %} + + ins_pipe(ialu_reg_imm); +%} + +// Register Or Long +instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ + match(Set dst (OrL src1 src2)); + + format %{ "orr $dst, $src1, $src2\t#@orL_reg_reg" %} + + ins_cost(ALU_COST); + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ orr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Immediate Or Long +instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ + match(Set dst (OrL src1 src2)); + + format %{ "ori $dst, $src1, $src2\t#@orL_reg_imm" %} + + ins_cost(ALU_COST); + ins_encode %{ + __ ori(as_Register($dst$$reg), + as_Register($src1$$reg), + (int32_t)($src2$$constant)); + %} + + ins_pipe(ialu_reg_imm); +%} + +// Register Xor Long +instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ + match(Set dst (XorL src1 src2)); + + format %{ "xorr $dst, $src1, $src2\t#@xorL_reg_reg" %} + + ins_cost(ALU_COST); + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ xorr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Immediate Xor Long +instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ + match(Set dst (XorL src1 src2)); + + ins_cost(ALU_COST); + format %{ "xori $dst, $src1, $src2\t#@xorL_reg_imm" %} + + ins_encode %{ + __ xori(as_Register($dst$$reg), + as_Register($src1$$reg), + (int32_t)($src2$$constant)); + %} + + ins_pipe(ialu_reg_imm); +%} + +// ============================================================================ +// BSWAP Instructions + +instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr) %{ + match(Set dst (ReverseBytesI src)); + effect(TEMP cr); + + ins_cost(ALU_COST * 13); + format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int" %} + + ins_encode %{ + __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_long(iRegLNoSp dst, iRegL src, rFlagsReg cr) %{ + match(Set dst (ReverseBytesL src)); + effect(TEMP cr); + + ins_cost(ALU_COST * 29); + format %{ "revb $dst, $src\t#@bytes_reverse_long" %} + + ins_encode %{ + __ revb(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{ + match(Set dst (ReverseBytesUS src)); + + ins_cost(ALU_COST * 5); + format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short" %} + + ins_encode %{ + __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{ + match(Set dst (ReverseBytesS src)); + + ins_cost(ALU_COST * 5); + format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short" %} + + ins_encode %{ + __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// ============================================================================ +// MemBar Instruction + +instruct load_fence() %{ + match(LoadFence); + ins_cost(ALU_COST); + + format %{ "#@load_fence" %} + + ins_encode %{ + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + %} + ins_pipe(pipe_serial); +%} + +instruct membar_acquire() %{ + match(MemBarAcquire); + ins_cost(ALU_COST); + + format %{ "#@membar_acquire\n\t" + "fence ir iorw" %} + + ins_encode %{ + __ block_comment("membar_acquire"); + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + %} + + ins_pipe(pipe_serial); +%} + +instruct membar_acquire_lock() %{ + match(MemBarAcquireLock); + ins_cost(0); + + format %{ "#@membar_acquire_lock (elided)" %} + + ins_encode %{ + __ block_comment("membar_acquire_lock (elided)"); + %} + + ins_pipe(pipe_serial); +%} + +instruct store_fence() %{ + match(StoreFence); + ins_cost(ALU_COST); + + format %{ "#@store_fence" %} + + ins_encode %{ + __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + %} + ins_pipe(pipe_serial); +%} + +instruct membar_release() %{ + match(MemBarRelease); + ins_cost(ALU_COST); + + format %{ "#@membar_release\n\t" + "fence iorw ow" %} + + ins_encode %{ + __ block_comment("membar_release"); + __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + %} + ins_pipe(pipe_serial); +%} + +instruct membar_storestore() %{ + match(MemBarStoreStore); + ins_cost(ALU_COST); + + format %{ "MEMBAR-store-store\t#@membar_storestore" %} + + ins_encode %{ + __ membar(MacroAssembler::StoreStore); + %} + ins_pipe(pipe_serial); +%} + +instruct membar_release_lock() %{ + match(MemBarReleaseLock); + ins_cost(0); + + format %{ "#@membar_release_lock (elided)" %} + + ins_encode %{ + __ block_comment("membar_release_lock (elided)"); + %} + + ins_pipe(pipe_serial); +%} + +instruct membar_volatile() %{ + match(MemBarVolatile); + ins_cost(ALU_COST); + + format %{ "#@membar_volatile\n\t" + "fence iorw iorw"%} + + ins_encode %{ + __ block_comment("membar_volatile"); + __ membar(MacroAssembler::StoreLoad); + %} + + ins_pipe(pipe_serial); +%} + +// ============================================================================ +// Cast Instructions (Java-level type cast) + +instruct castX2P(iRegPNoSp dst, iRegL src) %{ + match(Set dst (CastX2P src)); + + ins_cost(ALU_COST); + format %{ "mv $dst, $src\t# long -> ptr, #@castX2P" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + if ($dst$$reg != $src$$reg) { + __ mv(as_Register($dst$$reg), as_Register($src$$reg)); + } + %} + + ins_pipe(ialu_reg); +%} + +instruct castP2X(iRegLNoSp dst, iRegP src) %{ + match(Set dst (CastP2X src)); + + ins_cost(ALU_COST); + format %{ "mv $dst, $src\t# ptr -> long, #@castP2X" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + if ($dst$$reg != $src$$reg) { + __ mv(as_Register($dst$$reg), as_Register($src$$reg)); + } + %} + + ins_pipe(ialu_reg); +%} + +instruct castPP(iRegPNoSp dst) +%{ + match(Set dst (CastPP dst)); + ins_cost(0); + + size(0); + format %{ "# castPP of $dst, #@castPP" %} + ins_encode(/* empty encoding */); + ins_pipe(pipe_class_empty); +%} + +instruct castLL(iRegL dst) +%{ + match(Set dst (CastLL dst)); + + size(0); + format %{ "# castLL of $dst, #@castLL" %} + ins_encode(/* empty encoding */); + ins_cost(0); + ins_pipe(pipe_class_empty); +%} + +instruct castII(iRegI dst) +%{ + match(Set dst (CastII dst)); + + size(0); + format %{ "# castII of $dst, #@castII" %} + ins_encode(/* empty encoding */); + ins_cost(0); + ins_pipe(pipe_class_empty); +%} + +instruct checkCastPP(iRegPNoSp dst) +%{ + match(Set dst (CheckCastPP dst)); + + size(0); + ins_cost(0); + format %{ "# checkcastPP of $dst, #@checkCastPP" %} + ins_encode(/* empty encoding */); + ins_pipe(pipe_class_empty); +%} + +instruct castFF(fRegF dst) +%{ + match(Set dst (CastFF dst)); + + size(0); + format %{ "# castFF of $dst" %} + ins_encode(/* empty encoding */); + ins_cost(0); + ins_pipe(pipe_class_empty); +%} + +instruct castDD(fRegD dst) +%{ + match(Set dst (CastDD dst)); + + size(0); + format %{ "# castDD of $dst" %} + ins_encode(/* empty encoding */); + ins_cost(0); + ins_pipe(pipe_class_empty); +%} + +instruct castVV(vReg dst) +%{ + match(Set dst (CastVV dst)); + + size(0); + format %{ "# castVV of $dst" %} + ins_encode(/* empty encoding */); + ins_cost(0); + ins_pipe(pipe_class_empty); +%} + +// ============================================================================ +// Convert Instructions + +// int to bool +instruct convI2Bool(iRegINoSp dst, iRegI src) +%{ + match(Set dst (Conv2B src)); + + ins_cost(ALU_COST); + format %{ "snez $dst, $src\t#@convI2Bool" %} + + ins_encode %{ + __ snez(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// pointer to bool +instruct convP2Bool(iRegINoSp dst, iRegP src) +%{ + match(Set dst (Conv2B src)); + + ins_cost(ALU_COST); + format %{ "snez $dst, $src\t#@convP2Bool" %} + + ins_encode %{ + __ snez(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// int <-> long + +instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src) +%{ + match(Set dst (ConvI2L src)); + + ins_cost(ALU_COST); + format %{ "addw $dst, $src, zr\t#@convI2L_reg_reg" %} + ins_encode %{ + __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr); + %} + ins_pipe(ialu_reg); +%} + +instruct convL2I_reg(iRegINoSp dst, iRegL src) %{ + match(Set dst (ConvL2I src)); + + ins_cost(ALU_COST); + format %{ "addw $dst, $src, zr\t#@convL2I_reg" %} + + ins_encode %{ + __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr); + %} + + ins_pipe(ialu_reg); +%} + +// int to unsigned long (Zero-extend) +instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) +%{ + match(Set dst (AndL (ConvI2L src) mask)); + + ins_cost(ALU_COST * 2); + format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32); + %} + + ins_pipe(ialu_reg_shift); +%} + +// float <-> double + +instruct convF2D_reg(fRegD dst, fRegF src) %{ + match(Set dst (ConvF2D src)); + + ins_cost(XFER_COST); + format %{ "fcvt.d.s $dst, $src\t#@convF2D_reg" %} + + ins_encode %{ + __ fcvt_d_s(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_f2d); +%} + +instruct convD2F_reg(fRegF dst, fRegD src) %{ + match(Set dst (ConvD2F src)); + + ins_cost(XFER_COST); + format %{ "fcvt.s.d $dst, $src\t#@convD2F_reg" %} + + ins_encode %{ + __ fcvt_s_d(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_d2f); +%} + +// float <-> int + +instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{ + match(Set dst (ConvF2I src)); + + ins_cost(XFER_COST); + format %{ "fcvt.w.s $dst, $src\t#@convF2I_reg_reg" %} + + ins_encode %{ + __ fcvt_w_s_safe($dst$$Register, $src$$FloatRegister); + %} + + ins_pipe(fp_f2i); +%} + +instruct convI2F_reg_reg(fRegF dst, iRegIorL2I src) %{ + match(Set dst (ConvI2F src)); + + ins_cost(XFER_COST); + format %{ "fcvt.s.w $dst, $src\t#@convI2F_reg_reg" %} + + ins_encode %{ + __ fcvt_s_w(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(fp_i2f); +%} + +// float <-> long + +instruct convF2L_reg_reg(iRegLNoSp dst, fRegF src) %{ + match(Set dst (ConvF2L src)); + + ins_cost(XFER_COST); + format %{ "fcvt.l.s $dst, $src\t#@convF2L_reg_reg" %} + + ins_encode %{ + __ fcvt_l_s_safe($dst$$Register, $src$$FloatRegister); + %} + + ins_pipe(fp_f2l); +%} + +instruct convL2F_reg_reg(fRegF dst, iRegL src) %{ + match(Set dst (ConvL2F src)); + + ins_cost(XFER_COST); + format %{ "fcvt.s.l $dst, $src\t#@convL2F_reg_reg" %} + + ins_encode %{ + __ fcvt_s_l(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(fp_l2f); +%} + +// double <-> int + +instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{ + match(Set dst (ConvD2I src)); + + ins_cost(XFER_COST); + format %{ "fcvt.w.d $dst, $src\t#@convD2I_reg_reg" %} + + ins_encode %{ + __ fcvt_w_d_safe($dst$$Register, $src$$FloatRegister); + %} + + ins_pipe(fp_d2i); +%} + +instruct convI2D_reg_reg(fRegD dst, iRegIorL2I src) %{ + match(Set dst (ConvI2D src)); + + ins_cost(XFER_COST); + format %{ "fcvt.d.w $dst, $src\t#@convI2D_reg_reg" %} + + ins_encode %{ + __ fcvt_d_w(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(fp_i2d); +%} + +// double <-> long + +instruct convD2L_reg_reg(iRegLNoSp dst, fRegD src) %{ + match(Set dst (ConvD2L src)); + + ins_cost(XFER_COST); + format %{ "fcvt.l.d $dst, $src\t#@convD2L_reg_reg" %} + + ins_encode %{ + __ fcvt_l_d_safe($dst$$Register, $src$$FloatRegister); + %} + + ins_pipe(fp_d2l); +%} + +instruct convL2D_reg_reg(fRegD dst, iRegL src) %{ + match(Set dst (ConvL2D src)); + + ins_cost(XFER_COST); + format %{ "fcvt.d.l $dst, $src\t#@convL2D_reg_reg" %} + + ins_encode %{ + __ fcvt_d_l(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(fp_l2d); +%} + +// Convert oop into int for vectors alignment masking +instruct convP2I(iRegINoSp dst, iRegP src) %{ + match(Set dst (ConvL2I (CastP2X src))); + + ins_cost(ALU_COST * 2); + format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ zero_extend($dst$$Register, $src$$Register, 32); + %} + + ins_pipe(ialu_reg); +%} + +// Convert compressed oop into int for vectors alignment masking +// in case of 32bit oops (heap < 4Gb). +instruct convN2I(iRegINoSp dst, iRegN src) +%{ + predicate(CompressedOops::shift() == 0); + match(Set dst (ConvL2I (CastP2X (DecodeN src)))); + + ins_cost(ALU_COST); + format %{ "mv $dst, $src\t# compressed ptr -> int, #@convN2I" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ mv($dst$$Register, $src$$Register); + %} + + ins_pipe(ialu_reg); +%} + +// Convert oop pointer into compressed form +instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{ + match(Set dst (EncodeP src)); + ins_cost(ALU_COST); + format %{ "encode_heap_oop $dst, $src\t#@encodeHeapOop" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + __ encode_heap_oop(d, s); + %} + ins_pipe(ialu_reg); +%} + +instruct decodeHeapOop(iRegPNoSp dst, iRegN src) %{ + predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && + n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); + match(Set dst (DecodeN src)); + + ins_cost(0); + format %{ "decode_heap_oop $dst, $src\t#@decodeHeapOop" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + __ decode_heap_oop(d, s); + %} + ins_pipe(ialu_reg); +%} + +instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src) %{ + predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || + n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); + match(Set dst (DecodeN src)); + + ins_cost(0); + format %{ "decode_heap_oop_not_null $dst, $src\t#@decodeHeapOop_not_null" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + __ decode_heap_oop_not_null(d, s); + %} + ins_pipe(ialu_reg); +%} + +// Convert klass pointer into compressed form. +instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{ + match(Set dst (EncodePKlass src)); + + ins_cost(ALU_COST); + format %{ "encode_klass_not_null $dst, $src\t#@encodeKlass_not_null" %} + + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + __ encode_klass_not_null(dst_reg, src_reg, t0); + %} + + ins_pipe(ialu_reg); +%} + +instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src, iRegPNoSp tmp) %{ + match(Set dst (DecodeNKlass src)); + + effect(TEMP tmp); + + ins_cost(ALU_COST); + format %{ "decode_klass_not_null $dst, $src\t#@decodeKlass_not_null" %} + + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + Register tmp_reg = as_Register($tmp$$reg); + __ decode_klass_not_null(dst_reg, src_reg, tmp_reg); + %} + + ins_pipe(ialu_reg); +%} + +// stack <-> reg and reg <-> reg shuffles with no conversion + +instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{ + + match(Set dst (MoveF2I src)); + + effect(DEF dst, USE src); + + ins_cost(LOAD_COST); + + format %{ "lw $dst, $src\t#@MoveF2I_stack_reg" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ lw(as_Register($dst$$reg), Address(sp, $src$$disp)); + %} + + ins_pipe(iload_reg_reg); + +%} + +instruct MoveI2F_stack_reg(fRegF dst, stackSlotI src) %{ + + match(Set dst (MoveI2F src)); + + effect(DEF dst, USE src); + + ins_cost(LOAD_COST); + + format %{ "flw $dst, $src\t#@MoveI2F_stack_reg" %} + + ins_encode %{ + __ flw(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); + %} + + ins_pipe(pipe_class_memory); + +%} + +instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{ + + match(Set dst (MoveD2L src)); + + effect(DEF dst, USE src); + + ins_cost(LOAD_COST); + + format %{ "ld $dst, $src\t#@MoveD2L_stack_reg" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ ld(as_Register($dst$$reg), Address(sp, $src$$disp)); + %} + + ins_pipe(iload_reg_reg); + +%} + +instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{ + + match(Set dst (MoveL2D src)); + + effect(DEF dst, USE src); + + ins_cost(LOAD_COST); + + format %{ "fld $dst, $src\t#@MoveL2D_stack_reg" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); + %} + + ins_pipe(pipe_class_memory); + +%} + +instruct MoveF2I_reg_stack(stackSlotI dst, fRegF src) %{ + + match(Set dst (MoveF2I src)); + + effect(DEF dst, USE src); + + ins_cost(STORE_COST); + + format %{ "fsw $src, $dst\t#@MoveF2I_reg_stack" %} + + ins_encode %{ + __ fsw(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); + %} + + ins_pipe(pipe_class_memory); + +%} + +instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{ + + match(Set dst (MoveI2F src)); + + effect(DEF dst, USE src); + + ins_cost(STORE_COST); + + format %{ "sw $src, $dst\t#@MoveI2F_reg_stack" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ sw(as_Register($src$$reg), Address(sp, $dst$$disp)); + %} + + ins_pipe(istore_reg_reg); + +%} + +instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{ + + match(Set dst (MoveD2L src)); + + effect(DEF dst, USE src); + + ins_cost(STORE_COST); + + format %{ "fsd $dst, $src\t#@MoveD2L_reg_stack" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); + %} + + ins_pipe(pipe_class_memory); + +%} + +instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{ + + match(Set dst (MoveL2D src)); + + effect(DEF dst, USE src); + + ins_cost(STORE_COST); + + format %{ "sd $src, $dst\t#@MoveL2D_reg_stack" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + __ sd(as_Register($src$$reg), Address(sp, $dst$$disp)); + %} + + ins_pipe(istore_reg_reg); + +%} + +instruct MoveF2I_reg_reg(iRegINoSp dst, fRegF src) %{ + + match(Set dst (MoveF2I src)); + + effect(DEF dst, USE src); + + ins_cost(XFER_COST); + + format %{ "fmv.x.w $dst, $src\t#@MoveL2D_reg_stack" %} + + ins_encode %{ + __ fmv_x_w(as_Register($dst$$reg), as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_f2i); + +%} + +instruct MoveI2F_reg_reg(fRegF dst, iRegI src) %{ + + match(Set dst (MoveI2F src)); + + effect(DEF dst, USE src); + + ins_cost(XFER_COST); + + format %{ "fmv.w.x $dst, $src\t#@MoveI2F_reg_reg" %} + + ins_encode %{ + __ fmv_w_x(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(fp_i2f); + +%} + +instruct MoveD2L_reg_reg(iRegLNoSp dst, fRegD src) %{ + + match(Set dst (MoveD2L src)); + + effect(DEF dst, USE src); + + ins_cost(XFER_COST); + + format %{ "fmv.x.d $dst, $src\t#@MoveD2L_reg_reg" %} + + ins_encode %{ + __ fmv_x_d(as_Register($dst$$reg), as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_d2l); + +%} + +instruct MoveL2D_reg_reg(fRegD dst, iRegL src) %{ + + match(Set dst (MoveL2D src)); + + effect(DEF dst, USE src); + + ins_cost(XFER_COST); + + format %{ "fmv.d.x $dst, $src\t#@MoveD2L_reg_reg" %} + + ins_encode %{ + __ fmv_d_x(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(fp_l2d); +%} + +// ============================================================================ +// Compare Instructions which set the result float comparisons in dest register. + +instruct cmpF3_reg_reg(iRegINoSp dst, fRegF op1, fRegF op2) +%{ + match(Set dst (CmpF3 op1 op2)); + + ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST); + format %{ "flt.s $dst, $op2, $op1\t#@cmpF3_reg_reg\n\t" + "bgtz $dst, done\n\t" + "feq.s $dst, $op1, $op2\n\t" + "addi $dst, $dst, -1\t#@cmpF3_reg_reg" + %} + + ins_encode %{ + // we want -1 for unordered or less than, 0 for equal and 1 for greater than. + __ float_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/); + %} + + ins_pipe(pipe_class_default); +%} + +instruct cmpD3_reg_reg(iRegINoSp dst, fRegD op1, fRegD op2) +%{ + match(Set dst (CmpD3 op1 op2)); + + ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST); + format %{ "flt.d $dst, $op2, $op1\t#@cmpD3_reg_reg\n\t" + "bgtz $dst, done\n\t" + "feq.d $dst, $op1, $op2\n\t" + "addi $dst, $dst, -1\t#@cmpD3_reg_reg" + %} + + ins_encode %{ + // we want -1 for unordered or less than, 0 for equal and 1 for greater than. + __ double_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/); + %} + + ins_pipe(pipe_class_default); +%} + +instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2) +%{ + match(Set dst (CmpL3 op1 op2)); + + ins_cost(ALU_COST * 3 + BRANCH_COST); + format %{ "slt $dst, $op2, $op1\t#@cmpL3_reg_reg\n\t" + "bnez $dst, done\n\t" + "slt $dst, $op1, $op2\n\t" + "neg $dst, $dst\t#@cmpL3_reg_reg" + %} + ins_encode %{ + __ cmp_l2i(t0, as_Register($op1$$reg), as_Register($op2$$reg)); + __ mv(as_Register($dst$$reg), t0); + %} + + ins_pipe(pipe_class_default); +%} + +instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q) +%{ + match(Set dst (CmpLTMask p q)); + + ins_cost(2 * ALU_COST); + + format %{ "slt $dst, $p, $q\t#@cmpLTMask_reg_reg\n\t" + "subw $dst, zr, $dst\t#@cmpLTMask_reg_reg" + %} + + ins_encode %{ + __ slt(as_Register($dst$$reg), as_Register($p$$reg), as_Register($q$$reg)); + __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I op, immI0 zero) +%{ + match(Set dst (CmpLTMask op zero)); + + ins_cost(ALU_COST); + + format %{ "sraiw $dst, $dst, 31\t#@cmpLTMask_reg_reg" %} + + ins_encode %{ + __ sraiw(as_Register($dst$$reg), as_Register($op$$reg), 31); + %} + + ins_pipe(ialu_reg_shift); +%} + + +// ============================================================================ +// Max and Min + +instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2) +%{ + match(Set dst (MinI src1 src2)); + + effect(DEF dst, USE src1, USE src2); + + ins_cost(BRANCH_COST + ALU_COST * 2); + format %{ + "ble $src1, $src2, Lsrc1.\t#@minI_rReg\n\t" + "mv $dst, $src2\n\t" + "j Ldone\n\t" + "bind Lsrc1\n\t" + "mv $dst, $src1\n\t" + "bind\t#@minI_rReg" + %} + + ins_encode %{ + Label Lsrc1, Ldone; + __ ble(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1); + __ mv(as_Register($dst$$reg), as_Register($src2$$reg)); + __ j(Ldone); + __ bind(Lsrc1); + __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); + __ bind(Ldone); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2) +%{ + match(Set dst (MaxI src1 src2)); + + effect(DEF dst, USE src1, USE src2); + + ins_cost(BRANCH_COST + ALU_COST * 2); + format %{ + "bge $src1, $src2, Lsrc1\t#@maxI_rReg\n\t" + "mv $dst, $src2\n\t" + "j Ldone\n\t" + "bind Lsrc1\n\t" + "mv $dst, $src1\n\t" + "bind\t#@maxI_rReg" + %} + + ins_encode %{ + Label Lsrc1, Ldone; + __ bge(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1); + __ mv(as_Register($dst$$reg), as_Register($src2$$reg)); + __ j(Ldone); + __ bind(Lsrc1); + __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); + __ bind(Ldone); + + %} + + ins_pipe(ialu_reg_reg); +%} + +// ============================================================================ +// Branch Instructions +// Direct Branch. +instruct branch(label lbl) +%{ + match(Goto); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + format %{ "j $lbl\t#@branch" %} + + ins_encode(riscv_enc_j(lbl)); + + ins_pipe(pipe_branch); +%} + +// ============================================================================ +// Compare and Branch Instructions + +// Patterns for short (< 12KiB) variants + +// Compare flags and branch near instructions. +instruct cmpFlag_branch(cmpOpEqNe cmp, rFlagsReg cr, label lbl) %{ + match(If cmp cr); + effect(USE lbl); + + ins_cost(BRANCH_COST); + format %{ "b$cmp $cr, zr, $lbl\t#@cmpFlag_branch" %} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label)); + %} + ins_pipe(pipe_cmpz_branch); + ins_short_branch(1); +%} + +// Compare signed int and branch near instructions +instruct cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) +%{ + // Same match rule as `far_cmpI_branch'. + match(If cmp (CmpI op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +instruct cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) +%{ + // Same match rule as `far_cmpI_loop'. + match(CountedLoopEnd cmp (CmpI op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +// Compare unsigned int and branch near instructions +instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) +%{ + // Same match rule as `far_cmpU_branch'. + match(If cmp (CmpU op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) +%{ + // Same match rule as `far_cmpU_loop'. + match(CountedLoopEnd cmp (CmpU op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +// Compare signed long and branch near instructions +instruct cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) +%{ + // Same match rule as `far_cmpL_branch'. + match(If cmp (CmpL op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +instruct cmpL_loop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) +%{ + // Same match rule as `far_cmpL_loop'. + match(CountedLoopEnd cmp (CmpL op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +// Compare unsigned long and branch near instructions +instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) +%{ + // Same match rule as `far_cmpUL_branch'. + match(If cmp (CmpUL op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) +%{ + // Same match rule as `far_cmpUL_loop'. + match(CountedLoopEnd cmp (CmpUL op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +// Compare pointer and branch near instructions +instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) +%{ + // Same match rule as `far_cmpP_branch'. + match(If cmp (CmpP op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) +%{ + // Same match rule as `far_cmpP_loop'. + match(CountedLoopEnd cmp (CmpP op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +// Compare narrow pointer and branch near instructions +instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) +%{ + // Same match rule as `far_cmpN_branch'. + match(If cmp (CmpN op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) +%{ + // Same match rule as `far_cmpN_loop'. + match(CountedLoopEnd cmp (CmpN op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +// Compare float and branch near instructions +instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) +%{ + // Same match rule as `far_cmpF_branch'. + match(If cmp (CmpF op1 op2)); + + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); + format %{ "float_b$cmp $op1, $op2 \t#@cmpF_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_class_compare); + ins_short_branch(1); +%} + +instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) +%{ + // Same match rule as `far_cmpF_loop'. + match(CountedLoopEnd cmp (CmpF op1 op2)); + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); + format %{ "float_b$cmp $op1, $op2\t#@cmpF_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_class_compare); + ins_short_branch(1); +%} + +// Compare double and branch near instructions +instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) +%{ + // Same match rule as `far_cmpD_branch'. + match(If cmp (CmpD op1 op2)); + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); + format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_class_compare); + ins_short_branch(1); +%} + +instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) +%{ + // Same match rule as `far_cmpD_loop'. + match(CountedLoopEnd cmp (CmpD op1 op2)); + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); + format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_class_compare); + ins_short_branch(1); +%} + +// Compare signed int with zero and branch near instructions +instruct cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl) +%{ + // Same match rule as `far_cmpI_reg_imm0_branch'. + match(If cmp (CmpI op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); + format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); + %} + + ins_pipe(pipe_cmpz_branch); + ins_short_branch(1); +%} + +instruct cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl) +%{ + // Same match rule as `far_cmpI_reg_imm0_loop'. + match(CountedLoopEnd cmp (CmpI op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); + %} + + ins_pipe(pipe_cmpz_branch); + ins_short_branch(1); +%} + +// Compare unsigned int with zero and branch near instructions +instruct cmpUEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) +%{ + // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_branch'. + match(If cmp (CmpU op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_branch" %} + + ins_encode %{ + __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmpz_branch); + ins_short_branch(1); +%} + +instruct cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) +%{ + // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_loop'. + match(CountedLoopEnd cmp (CmpU op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_loop" %} + + + ins_encode %{ + __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmpz_branch); + ins_short_branch(1); +%} + +// Compare signed long with zero and branch near instructions +instruct cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl) +%{ + // Same match rule as `far_cmpL_reg_imm0_branch'. + match(If cmp (CmpL op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); + %} + + ins_pipe(pipe_cmpz_branch); + ins_short_branch(1); +%} + +instruct cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl) +%{ + // Same match rule as `far_cmpL_reg_imm0_loop'. + match(CountedLoopEnd cmp (CmpL op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); + %} + + ins_pipe(pipe_cmpz_branch); + ins_short_branch(1); +%} + +// Compare unsigned long with zero and branch near instructions +instruct cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) +%{ + // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_branch'. + match(If cmp (CmpUL op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_branch" %} + + ins_encode %{ + __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmpz_branch); + ins_short_branch(1); +%} + +instruct cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) +%{ + // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_loop'. + match(CountedLoopEnd cmp (CmpUL op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_loop" %} + + ins_encode %{ + __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmpz_branch); + ins_short_branch(1); +%} + +// Compare pointer with zero and branch near instructions +instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ + // Same match rule as `far_cmpP_reg_imm0_branch'. + match(If cmp (CmpP op1 zero)); + effect(USE lbl); + + ins_cost(BRANCH_COST); + format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_branch" %} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmpz_branch); + ins_short_branch(1); +%} + +instruct cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ + // Same match rule as `far_cmpP_reg_imm0_loop'. + match(CountedLoopEnd cmp (CmpP op1 zero)); + effect(USE lbl); + + ins_cost(BRANCH_COST); + format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_loop" %} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmpz_branch); + ins_short_branch(1); +%} + +// Compare narrow pointer with zero and branch near instructions +instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ + // Same match rule as `far_cmpN_reg_imm0_branch'. + match(If cmp (CmpN op1 zero)); + effect(USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_branch" %} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmpz_branch); + ins_short_branch(1); +%} + +instruct cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ + // Same match rule as `far_cmpN_reg_imm0_loop'. + match(CountedLoopEnd cmp (CmpN op1 zero)); + effect(USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_loop" %} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmpz_branch); + ins_short_branch(1); +%} + +// Compare narrow pointer with pointer zero and branch near instructions +instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ + // Same match rule as `far_cmpP_narrowOop_imm0_branch'. + match(If cmp (CmpP (DecodeN op1) zero)); + effect(USE lbl); + + ins_cost(BRANCH_COST); + format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_branch" %} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmpz_branch); + ins_short_branch(1); +%} + +instruct cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ + // Same match rule as `far_cmpP_narrowOop_imm0_loop'. + match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero)); + effect(USE lbl); + + ins_cost(BRANCH_COST); + format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_loop" %} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + + ins_pipe(pipe_cmpz_branch); + ins_short_branch(1); +%} + +// Patterns for far (20KiB) variants + +instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{ + match(If cmp cr); + effect(USE lbl); + + ins_cost(BRANCH_COST); + format %{ "far_b$cmp $cr, zr, L\t#@far_cmpFlag_branch"%} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmpz_branch); +%} + +// Compare signed int and branch far instructions +instruct far_cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{ + match(If cmp (CmpI op1 op2)); + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + + // the format instruction [far_b$cmp] here is be used as two insructions + // in macroassembler: b$not_cmp(op1, op2, done), j($lbl), bind(done) + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmp_branch); +%} + +instruct far_cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{ + match(CountedLoopEnd cmp (CmpI op1 op2)); + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmp_branch); +%} + +instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ + match(If cmp (CmpU op1 op2)); + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmp_branch); +%} + +instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ + match(CountedLoopEnd cmp (CmpU op1 op2)); + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmp_branch); +%} + +instruct far_cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{ + match(If cmp (CmpL op1 op2)); + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmp_branch); +%} + +instruct far_cmpLloop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{ + match(CountedLoopEnd cmp (CmpL op1 op2)); + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmp_branch); +%} + +instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ + match(If cmp (CmpUL op1 op2)); + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmp_branch); +%} + +instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ + match(CountedLoopEnd cmp (CmpUL op1 op2)); + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmp_branch); +%} + +instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) +%{ + match(If cmp (CmpP op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmp_branch); +%} + +instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) +%{ + match(CountedLoopEnd cmp (CmpP op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmp_branch); +%} + +instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) +%{ + match(If cmp (CmpN op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmp_branch); +%} + +instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) +%{ + match(CountedLoopEnd cmp (CmpN op1 op2)); + + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmp_branch); +%} + +// Float compare and branch instructions +instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) +%{ + match(If cmp (CmpF op1 op2)); + + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); + format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), + *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_class_compare); +%} + +instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) +%{ + match(CountedLoopEnd cmp (CmpF op1 op2)); + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); + format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), + *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_class_compare); +%} + +// Double compare and branch instructions +instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) +%{ + match(If cmp (CmpD op1 op2)); + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); + format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_class_compare); +%} + +instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) +%{ + match(CountedLoopEnd cmp (CmpD op1 op2)); + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); + format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_class_compare); +%} + +instruct far_cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl) +%{ + match(If cmp (CmpI op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST * 2); + + format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmpz_branch); +%} + +instruct far_cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl) +%{ + match(CountedLoopEnd cmp (CmpI op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST * 2); + + format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmpz_branch); +%} + +instruct far_cmpUEqNeLeGt_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) +%{ + match(If cmp (CmpU op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST * 2); + + format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_imm0_branch" %} + + ins_encode %{ + __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmpz_branch); +%} + +instruct far_cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) +%{ + match(CountedLoopEnd cmp (CmpU op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST * 2); + + format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_reg_imm0_loop" %} + + + ins_encode %{ + __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmpz_branch); +%} + +// compare lt/ge unsigned instructs has no short instruct with same match +instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl) +%{ + match(If cmp (CmpU op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %} + + ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); + + ins_pipe(pipe_cmpz_branch); +%} + +instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl) +%{ + match(CountedLoopEnd cmp (CmpU op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %} + + ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); + + ins_pipe(pipe_cmpz_branch); +%} + +instruct far_cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl) +%{ + match(If cmp (CmpL op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST * 2); + + format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_branch" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmpz_branch); +%} + +instruct far_cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl) +%{ + match(CountedLoopEnd cmp (CmpL op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST * 2); + + format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_loop" %} + + ins_encode %{ + __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmpz_branch); +%} + +instruct far_cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) +%{ + match(If cmp (CmpUL op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST * 2); + + format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_branch" %} + + ins_encode %{ + __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmpz_branch); +%} + +instruct far_cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) +%{ + match(CountedLoopEnd cmp (CmpUL op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST * 2); + + format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_loop" %} + + ins_encode %{ + __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmpz_branch); +%} + +// compare lt/ge unsigned instructs has no short instruct with same match +instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl) +%{ + match(If cmp (CmpUL op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %} + + ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); + + ins_pipe(pipe_cmpz_branch); +%} + +instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl) +%{ + match(CountedLoopEnd cmp (CmpUL op1 zero)); + + effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); + + format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %} + + ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); + + ins_pipe(pipe_cmpz_branch); +%} + +instruct far_cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ + match(If cmp (CmpP op1 zero)); + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_branch" %} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmpz_branch); +%} + +instruct far_cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ + match(CountedLoopEnd cmp (CmpP op1 zero)); + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_loop" %} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmpz_branch); +%} + +instruct far_cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ + match(If cmp (CmpN op1 zero)); + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + + format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_branch" %} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmpz_branch); +%} + +instruct far_cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ + match(CountedLoopEnd cmp (CmpN op1 zero)); + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + + format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_loop" %} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmpz_branch); +%} + +instruct far_cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ + match(If cmp (CmpP (DecodeN op1) zero)); + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_branch" %} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmpz_branch); +%} + +instruct far_cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ + match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero)); + effect(USE lbl); + + ins_cost(BRANCH_COST * 2); + format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_loop" %} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + + ins_pipe(pipe_cmpz_branch); +%} + +// ============================================================================ +// Conditional Move Instructions +instruct cmovI_cmpI(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOp cop) %{ + match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); + + format %{ + "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t" + "mv $dst, $src\n\t" + "skip:" + %} + + ins_encode %{ + __ enc_cmove($cop$$cmpcode, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{ + match(Set dst (CMoveI (Binary cop (CmpU op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); + + format %{ + "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t" + "mv $dst, $src\n\t" + "skip:" + %} + + ins_encode %{ + __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{ + match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); + + format %{ + "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t" + "mv $dst, $src\n\t" + "skip:" + %} + + ins_encode %{ + __ enc_cmove($cop$$cmpcode, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{ + match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); + + format %{ + "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t" + "mv $dst, $src\n\t" + "skip:" + %} + + ins_encode %{ + __ enc_cmove($cop$$cmpcode, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{ + match(Set dst (CMoveL (Binary cop (CmpUL op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); + + format %{ + "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t" + "mv $dst, $src\n\t" + "skip:" + %} + + ins_encode %{ + __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{ + match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); + format %{ + "bneg$cop $op1, $op2\t#@cmovI_cmpUL\n\t" + "mv $dst, $src\n\t" + "skip:" + %} + + ins_encode %{ + __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(pipe_slow); +%} + + +// ============================================================================ +// Procedure Call/Return Instructions + +// Call Java Static Instruction +// Note: If this code changes, the corresponding ret_addr_offset() and +// compute_padding() functions will have to be adjusted. +instruct CallStaticJavaDirect(method meth) +%{ + match(CallStaticJava); + + effect(USE meth); + + ins_cost(BRANCH_COST); + + format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %} + + ins_encode(riscv_enc_java_static_call(meth), + riscv_enc_call_epilog); + + ins_pipe(pipe_class_call); + ins_alignment(4); +%} + +// TO HERE + +// Call Java Dynamic Instruction +// Note: If this code changes, the corresponding ret_addr_offset() and +// compute_padding() functions will have to be adjusted. +instruct CallDynamicJavaDirect(method meth, rFlagsReg cr) +%{ + match(CallDynamicJava); + + effect(USE meth, KILL cr); + + ins_cost(BRANCH_COST + ALU_COST * 6); + + format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %} + + ins_encode(riscv_enc_java_dynamic_call(meth), + riscv_enc_call_epilog); + + ins_pipe(pipe_class_call); + ins_alignment(4); +%} + +// Call Runtime Instruction + +instruct CallRuntimeDirect(method meth, rFlagsReg cr) +%{ + match(CallRuntime); + + effect(USE meth, KILL cr); + + ins_cost(BRANCH_COST); + + format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %} + + ins_encode(riscv_enc_java_to_runtime(meth)); + + ins_pipe(pipe_class_call); +%} + +// Call Runtime Instruction + +instruct CallLeafDirect(method meth, rFlagsReg cr) +%{ + match(CallLeaf); + + effect(USE meth, KILL cr); + + ins_cost(BRANCH_COST); + + format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %} + + ins_encode(riscv_enc_java_to_runtime(meth)); + + ins_pipe(pipe_class_call); +%} + +// Call Runtime Instruction + +instruct CallLeafNoFPDirect(method meth, rFlagsReg cr) +%{ + match(CallLeafNoFP); + + effect(USE meth, KILL cr); + + ins_cost(BRANCH_COST); + + format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %} + + ins_encode(riscv_enc_java_to_runtime(meth)); + + ins_pipe(pipe_class_call); +%} + +// ============================================================================ +// Partial Subtype Check +// +// superklass array for an instance of the superklass. Set a hidden +// internal cache on a hit (cache is checked with exposed code in +// gen_subtype_check()). Return zero for a hit. The encoding +// ALSO sets flags. + +instruct partialSubtypeCheck(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, rFlagsReg cr) +%{ + match(Set result (PartialSubtypeCheck sub super)); + effect(KILL tmp, KILL cr); + + ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); + format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %} + + ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result)); + + opcode(0x1); // Force zero of result reg on hit + + ins_pipe(pipe_class_memory); +%} + +instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, + immP0 zero, rFlagsReg cr) +%{ + match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); + effect(KILL tmp, KILL result); + + ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); + format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %} + + ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result)); + + opcode(0x0); // Don't zero result reg on hit + + ins_pipe(pipe_class_memory); +%} + +instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) +%{ + predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} + ins_encode %{ + // Count is in 8-bit bytes; non-Compact chars are 16 bits. + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, + StrIntrinsicNode::UU); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) +%{ + predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, + StrIntrinsicNode::LL); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) +%{ + predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, + StrIntrinsicNode::UL); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, + rFlagsReg cr) +%{ + predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, + StrIntrinsicNode::LU); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, + iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); + + format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %} + ins_encode %{ + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, + $tmp5$$Register, $tmp6$$Register, + $result$$Register, StrIntrinsicNode::UU); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, + iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); + + format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %} + ins_encode %{ + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, + $tmp5$$Register, $tmp6$$Register, + $result$$Register, StrIntrinsicNode::LL); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, + iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %} + + ins_encode %{ + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, + $tmp5$$Register, $tmp6$$Register, + $result$$Register, StrIntrinsicNode::UL); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, + immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + + format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %} + + ins_encode %{ + int icnt2 = (int)$int_cnt2$$constant; + __ string_indexof_linearscan($str1$$Register, $str2$$Register, + $cnt1$$Register, zr, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, + icnt2, $result$$Register, StrIntrinsicNode::UU); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, + immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + + format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %} + ins_encode %{ + int icnt2 = (int)$int_cnt2$$constant; + __ string_indexof_linearscan($str1$$Register, $str2$$Register, + $cnt1$$Register, zr, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, + icnt2, $result$$Register, StrIntrinsicNode::LL); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, + immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + + format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %} + ins_encode %{ + int icnt2 = (int)$int_cnt2$$constant; + __ string_indexof_linearscan($str1$$Register, $str2$$Register, + $cnt1$$Register, zr, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, + icnt2, $result$$Register, StrIntrinsicNode::UL); + %} + ins_pipe(pipe_class_memory); +%} + +instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +%{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); + predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + + format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} + ins_encode %{ + __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, + $result$$Register, $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, false /* isU */); + %} + ins_pipe(pipe_class_memory); +%} + + +instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +%{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); + predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + + format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} + ins_encode %{ + __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, + $result$$Register, $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, true /* isL */); + %} + ins_pipe(pipe_class_memory); +%} + +// clearing of an array +instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy) +%{ + predicate(!UseRVV); + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL cnt, USE_KILL base); + + ins_cost(4 * DEFAULT_COST); + format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} + + ins_encode %{ + address tpc = __ zero_words($base$$Register, $cnt$$Register); + if (tpc == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + %} + + ins_pipe(pipe_class_memory); +%} + +instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) +%{ + predicate(!UseRVV && (uint64_t)n->in(2)->get_long() + < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL base, KILL cr); + + ins_cost(4 * DEFAULT_COST); + format %{ "ClearArray $cnt, $base\t#@clearArray_imm_reg" %} + + ins_encode %{ + __ zero_words($base$$Register, (uint64_t)$cnt$$constant); + %} + + ins_pipe(pipe_class_memory); +%} + +instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, + iRegI_R10 result, rFlagsReg cr) +%{ + predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); + + format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} + ins_encode %{ + // Count is in 8-bit bytes; non-Compact chars are 16 bits. + __ string_equals($str1$$Register, $str2$$Register, + $result$$Register, $cnt$$Register, 1); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, + iRegI_R10 result, rFlagsReg cr) +%{ + predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); + + format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} + ins_encode %{ + // Count is in 8-bit bytes; non-Compact chars are 16 bits. + __ string_equals($str1$$Register, $str2$$Register, + $result$$Register, $cnt$$Register, 2); + %} + ins_pipe(pipe_class_memory); +%} + +instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, + iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, + iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) +%{ + predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (AryEq ary1 ary2)); + effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); + + format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp5" %} + ins_encode %{ + __ arrays_equals($ary1$$Register, $ary2$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, + $result$$Register, $tmp5$$Register, 1); + %} + ins_pipe(pipe_class_memory); +%} + +instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, + iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, + iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) +%{ + predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (AryEq ary1 ary2)); + effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); + + format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp5" %} + ins_encode %{ + __ arrays_equals($ary1$$Register, $ary2$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, + $result$$Register, $tmp5$$Register, 2); + %} + ins_pipe(pipe_class_memory); +%} + +// ============================================================================ +// Safepoint Instructions + +instruct safePoint(iRegP poll) +%{ + match(SafePoint poll); + + ins_cost(2 * LOAD_COST); + format %{ + "lwu zr, [$poll]\t# Safepoint: poll for GC, #@safePoint" + %} + ins_encode %{ + __ read_polling_page(as_Register($poll$$reg), 0, relocInfo::poll_type); + %} + ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem); +%} + +// ============================================================================ +// This name is KNOWN by the ADLC and cannot be changed. +// The ADLC forces a 'TypeRawPtr::BOTTOM' output type +// for this guy. +instruct tlsLoadP(javaThread_RegP dst) +%{ + match(Set dst (ThreadLocal)); + + ins_cost(0); + + format %{ " -- \t// $dst=Thread::current(), empty, #@tlsLoadP" %} + + size(0); + + ins_encode( /*empty*/ ); + + ins_pipe(pipe_class_empty); +%} + +// inlined locking and unlocking +// using t1 as the 'flag' register to bridge the BoolNode producers and consumers +instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) +%{ + match(Set cr (FastLock object box)); + effect(TEMP tmp1, TEMP tmp2); + + ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3); + format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2, #@cmpFastLock" %} + + ins_encode(riscv_enc_fast_lock(object, box, tmp1, tmp2)); + + ins_pipe(pipe_serial); +%} + +// using t1 as the 'flag' register to bridge the BoolNode producers and consumers +instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) +%{ + match(Set cr (FastUnlock object box)); + effect(TEMP tmp1, TEMP tmp2); + + ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4); + format %{ "fastunlock $object,$box\t! kills $tmp1, $tmp2, #@cmpFastUnlock" %} + + ins_encode(riscv_enc_fast_unlock(object, box, tmp1, tmp2)); + + ins_pipe(pipe_serial); +%} + +// Tail Call; Jump from runtime stub to Java code. +// Also known as an 'interprocedural jump'. +// Target of jump will eventually return to caller. +// TailJump below removes the return address. +instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop) +%{ + match(TailCall jump_target method_oop); + + ins_cost(BRANCH_COST); + + format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %} + + ins_encode(riscv_enc_tail_call(jump_target)); + + ins_pipe(pipe_class_call); +%} + +instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop) +%{ + match(TailJump jump_target ex_oop); + + ins_cost(ALU_COST + BRANCH_COST); + + format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %} + + ins_encode(riscv_enc_tail_jmp(jump_target)); + + ins_pipe(pipe_class_call); +%} + +// Create exception oop: created by stack-crawling runtime code. +// Created exception is now available to this handler, and is setup +// just prior to jumping to this handler. No code emitted. +instruct CreateException(iRegP_R10 ex_oop) +%{ + match(Set ex_oop (CreateEx)); + + ins_cost(0); + format %{ " -- \t// exception oop; no code emitted, #@CreateException" %} + + size(0); + + ins_encode( /*empty*/ ); + + ins_pipe(pipe_class_empty); +%} + +// Rethrow exception: The exception oop will come in the first +// argument position. Then JUMP (not call) to the rethrow stub code. +instruct RethrowException() +%{ + match(Rethrow); + + ins_cost(BRANCH_COST); + + format %{ "j rethrow_stub\t#@RethrowException" %} + + ins_encode(riscv_enc_rethrow()); + + ins_pipe(pipe_class_call); +%} + +// Return Instruction +// epilog node loads ret address into ra as part of frame pop +instruct Ret() +%{ + match(Return); + + ins_cost(BRANCH_COST); + format %{ "ret\t// return register, #@Ret" %} + + ins_encode(riscv_enc_ret()); + + ins_pipe(pipe_branch); +%} + +// Die now. +instruct ShouldNotReachHere() %{ + match(Halt); + + ins_cost(BRANCH_COST); + + format %{ "#@ShouldNotReachHere" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); + if (is_reachable()) { + __ halt(); + } + %} + + ins_pipe(pipe_class_default); +%} + + +//----------PEEPHOLE RULES----------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +// +// peepmatch ( root_instr_name [preceding_instruction]* ); +// +// peepconstraint %{ +// (instruction_number.operand_name relational_op instruction_number.operand_name +// [, ...] ); +// // instruction numbers are zero-based using left to right order in peepmatch +// +// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); +// // provide an instruction_number.operand_name for each operand that appears +// // in the replacement instruction's match rule +// +// ---------VM FLAGS--------------------------------------------------------- +// +// All peephole optimizations can be turned off using -XX:-OptoPeephole +// +// Each peephole rule is given an identifying number starting with zero and +// increasing by one in the order seen by the parser. An individual peephole +// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# +// on the command-line. +// +// ---------CURRENT LIMITATIONS---------------------------------------------- +// +// Only match adjacent instructions in same basic block +// Only equality constraints +// Only constraints between operands, not (0.dest_reg == RAX_enc) +// Only one replacement instruction +// +//----------SMARTSPILL RULES--------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. + +// Local Variables: +// mode: c++ +// End: Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/riscv_b.ad =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/riscv_b.ad @@ -0,0 +1,527 @@ +// +// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// RISCV Bit-Manipulation Extension Architecture Description File + +instruct rorI_imm_rvb(iRegINoSp dst, iRegI src, immI shift) %{ + predicate(UseRVB); + match(Set dst (RotateRight src shift)); + + format %{ "roriw $dst, $src, ($shift & 0x1f)\t#@rorI_imm_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ + __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_shift); +%} + +instruct rorL_imm_rvb(iRegLNoSp dst, iRegL src, immI shift) %{ + predicate(UseRVB); + match(Set dst (RotateRight src shift)); + + format %{ "rori $dst, $src, ($shift & 0x3f)\t#@rorL_imm_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ + __ rori(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_shift); +%} + +instruct rorI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{ + predicate(UseRVB); + match(Set dst (RotateRight src shift)); + + format %{ "rorw $dst, $src, $shift\t#@rorI_reg_rvb" %} + ins_cost(ALU_COST); + ins_encode %{ + __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct rorL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{ + predicate(UseRVB); + match(Set dst (RotateRight src shift)); + + format %{ "ror $dst, $src, $shift\t#@rorL_reg_rvb" %} + ins_cost(ALU_COST); + ins_encode %{ + __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct rolI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{ + predicate(UseRVB); + match(Set dst (RotateLeft src shift)); + + format %{ "rolw $dst, $src, $shift\t#@rolI_reg_rvb" %} + ins_cost(ALU_COST); + ins_encode %{ + __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct rolL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{ + predicate(UseRVB); + match(Set dst (RotateLeft src shift)); + + format %{ "rol $dst, $src, $shift\t#@rolL_reg_rvb" %} + ins_cost(ALU_COST); + ins_encode %{ + __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Convert oop into int for vectors alignment masking +instruct convP2I_rvb(iRegINoSp dst, iRegP src) %{ + predicate(UseRVB); + match(Set dst (ConvL2I (CastP2X src))); + + format %{ "zext.w $dst, $src\t# ptr -> int @convP2I_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ + __ zext_w(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// byte to int +instruct convB2I_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{ + predicate(UseRVB); + match(Set dst (RShiftI (LShiftI src lshift) rshift)); + + format %{ "sext.b $dst, $src\t# b2i, #@convB2I_reg_reg_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ + __ sext_b(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// int to short +instruct convI2S_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{ + predicate(UseRVB); + match(Set dst (RShiftI (LShiftI src lshift) rshift)); + + format %{ "sext.h $dst, $src\t# i2s, #@convI2S_reg_reg_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ + __ sext_h(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// short to unsigned int +instruct convS2UI_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{ + predicate(UseRVB); + match(Set dst (AndI src mask)); + + format %{ "zext.h $dst, $src\t# s2ui, #@convS2UI_reg_reg_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ + __ zext_h(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// int to unsigned long (zero extend) +instruct convI2UL_reg_reg_rvb(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{ + predicate(UseRVB); + match(Set dst (AndL (ConvI2L src) mask)); + + format %{ "zext.w $dst, $src\t# i2ul, #@convI2UL_reg_reg_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ + __ zext_w(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg_shift); +%} + +// BSWAP instructions +instruct bytes_reverse_int_rvb(iRegINoSp dst, iRegIorL2I src) %{ + predicate(UseRVB); + match(Set dst (ReverseBytesI src)); + + ins_cost(ALU_COST * 2); + format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_rvb" %} + + ins_encode %{ + __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_long_rvb(iRegLNoSp dst, iRegL src) %{ + predicate(UseRVB); + match(Set dst (ReverseBytesL src)); + + ins_cost(ALU_COST); + format %{ "rev8 $dst, $src\t#@bytes_reverse_long_rvb" %} + + ins_encode %{ + __ rev8(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_unsigned_short_rvb(iRegINoSp dst, iRegIorL2I src) %{ + predicate(UseRVB); + match(Set dst (ReverseBytesUS src)); + + ins_cost(ALU_COST * 2); + format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_rvb" %} + + ins_encode %{ + __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_short_rvb(iRegINoSp dst, iRegIorL2I src) %{ + predicate(UseRVB); + match(Set dst (ReverseBytesS src)); + + ins_cost(ALU_COST * 2); + format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_rvb" %} + + ins_encode %{ + __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// Shift Add Pointer +instruct shaddP_reg_reg_rvb(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{ + predicate(UseRVB); + match(Set dst (AddP src1 (LShiftL src2 imm))); + + ins_cost(ALU_COST); + format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_rvb" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + t0, + $imm$$constant); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct shaddP_reg_reg_ext_rvb(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{ + predicate(UseRVB); + match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm))); + + ins_cost(ALU_COST); + format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_rvb" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + t0, + $imm$$constant); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Shift Add Long +instruct shaddL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{ + predicate(UseRVB); + match(Set dst (AddL src1 (LShiftL src2 imm))); + + ins_cost(ALU_COST); + format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_rvb" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + t0, + $imm$$constant); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct shaddL_reg_reg_ext_rvb(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{ + predicate(UseRVB); + match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm))); + + ins_cost(ALU_COST); + format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_rvb" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + t0, + $imm$$constant); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Zeros Count instructions +instruct countLeadingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{ + predicate(UseRVB); + match(Set dst (CountLeadingZerosI src)); + + ins_cost(ALU_COST); + format %{ "clzw $dst, $src\t#@countLeadingZerosI_rvb" %} + + ins_encode %{ + __ clzw(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL_rvb(iRegINoSp dst, iRegL src) %{ + predicate(UseRVB); + match(Set dst (CountLeadingZerosL src)); + + ins_cost(ALU_COST); + format %{ "clz $dst, $src\t#@countLeadingZerosL_rvb" %} + + ins_encode %{ + __ clz(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{ + predicate(UseRVB); + match(Set dst (CountTrailingZerosI src)); + + ins_cost(ALU_COST); + format %{ "ctzw $dst, $src\t#@countTrailingZerosI_rvb" %} + + ins_encode %{ + __ ctzw(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL_rvb(iRegINoSp dst, iRegL src) %{ + predicate(UseRVB); + match(Set dst (CountTrailingZerosL src)); + + ins_cost(ALU_COST); + format %{ "ctz $dst, $src\t#@countTrailingZerosL_rvb" %} + + ins_encode %{ + __ ctz(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// Population Count instructions +instruct popCountI_rvb(iRegINoSp dst, iRegIorL2I src) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI src)); + + ins_cost(ALU_COST); + format %{ "cpopw $dst, $src\t#@popCountI_rvb" %} + + ins_encode %{ + __ cpopw(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// Note: Long/bitCount(long) returns an int. +instruct popCountL_rvb(iRegINoSp dst, iRegL src) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL src)); + + ins_cost(ALU_COST); + format %{ "cpop $dst, $src\t#@popCountL_rvb" %} + + ins_encode %{ + __ cpop(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// Max and Min +instruct minI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{ + predicate(UseRVB); + match(Set dst (MinI src1 src2)); + + ins_cost(ALU_COST); + format %{ "min $dst, $src1, $src2\t#@minI_reg_rvb" %} + + ins_encode %{ + __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct maxI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{ + predicate(UseRVB); + match(Set dst (MaxI src1 src2)); + + ins_cost(ALU_COST); + format %{ "max $dst, $src1, $src2\t#@maxI_reg_rvb" %} + + ins_encode %{ + __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Abs +instruct absI_reg_rvb(iRegINoSp dst, iRegI src) %{ + predicate(UseRVB); + match(Set dst (AbsI src)); + + ins_cost(ALU_COST * 2); + format %{ + "negw t0, $src\n\t" + "max $dst, $src, t0\t#@absI_reg_rvb" + %} + + ins_encode %{ + __ negw(t0, as_Register($src$$reg)); + __ max(as_Register($dst$$reg), as_Register($src$$reg), t0); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct absL_reg_rvb(iRegLNoSp dst, iRegL src) %{ + predicate(UseRVB); + match(Set dst (AbsL src)); + + ins_cost(ALU_COST * 2); + format %{ + "neg t0, $src\n\t" + "max $dst, $src, t0\t#@absL_reg_rvb" + %} + + ins_encode %{ + __ neg(t0, as_Register($src$$reg)); + __ max(as_Register($dst$$reg), as_Register($src$$reg), t0); + %} + + ins_pipe(ialu_reg); +%} + +// And Not +instruct andnI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ + predicate(UseRVB); + match(Set dst (AndI src1 (XorI src2 m1))); + + ins_cost(ALU_COST); + format %{ "andn $dst, $src1, $src2\t#@andnI_reg_reg_rvb" %} + + ins_encode %{ + __ andn(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct andnL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ + predicate(UseRVB); + match(Set dst (AndL src1 (XorL src2 m1))); + + ins_cost(ALU_COST); + format %{ "andn $dst, $src1, $src2\t#@andnL_reg_reg_rvb" %} + + ins_encode %{ + __ andn(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Or Not +instruct ornI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ + predicate(UseRVB); + match(Set dst (OrI src1 (XorI src2 m1))); + + ins_cost(ALU_COST); + format %{ "orn $dst, $src1, $src2\t#@ornI_reg_reg_rvb" %} + + ins_encode %{ + __ orn(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct ornL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ + predicate(UseRVB); + match(Set dst (OrL src1 (XorL src2 m1))); + + ins_cost(ALU_COST); + format %{ "orn $dst, $src1, $src2\t#@ornL_reg_reg_rvb" %} + + ins_encode %{ + __ orn(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} \ No newline at end of file Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/riscv_v.ad =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/riscv_v.ad @@ -0,0 +1,2077 @@ +// +// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2020, Arm Limited. All rights reserved. +// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// RISCV Vector Extension Architecture Description File + +opclass vmemA(indirect); + +source_hpp %{ + bool op_vec_supported(int opcode); +%} + +source %{ + static inline BasicType vector_element_basic_type(const MachNode* n) { + const TypeVect* vt = n->bottom_type()->is_vect(); + return vt->element_basic_type(); + } + + static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) { + int def_idx = use->operand_index(opnd); + Node* def = use->in(def_idx); + const TypeVect* vt = def->bottom_type()->is_vect(); + return vt->element_basic_type(); + } + + + static void loadStore(C2_MacroAssembler masm, bool is_store, + VectorRegister reg, BasicType bt, Register base) { + Assembler::SEW sew = Assembler::elemtype_to_sew(bt); + masm.vsetvli(t0, x0, sew); + if (is_store) { + masm.vsex_v(reg, base, sew); + } else { + masm.vlex_v(reg, base, sew); + } + } + + bool op_vec_supported(int opcode) { + switch (opcode) { + // No multiply reduction instructions + case Op_MulReductionVD: + case Op_MulReductionVF: + case Op_MulReductionVI: + case Op_MulReductionVL: + // Others + case Op_Extract: + case Op_ExtractB: + case Op_ExtractC: + case Op_ExtractD: + case Op_ExtractF: + case Op_ExtractI: + case Op_ExtractL: + case Op_ExtractS: + case Op_ExtractUB: + // Vector API specific + case Op_AndReductionV: + case Op_OrReductionV: + case Op_XorReductionV: + case Op_LoadVectorGather: + case Op_StoreVectorScatter: + case Op_VectorBlend: + case Op_VectorCast: + case Op_VectorCastB2X: + case Op_VectorCastD2X: + case Op_VectorCastF2X: + case Op_VectorCastI2X: + case Op_VectorCastL2X: + case Op_VectorCastS2X: + case Op_VectorInsert: + case Op_VectorLoadConst: + case Op_VectorLoadMask: + case Op_VectorLoadShuffle: + case Op_VectorMaskCmp: + case Op_VectorRearrange: + case Op_VectorReinterpret: + case Op_VectorStoreMask: + case Op_VectorTest: + return false; + default: + return UseRVV; + } + } + +%} + +definitions %{ + int_def VEC_COST (200, 200); +%} + +// All VEC instructions + +// vector load/store +instruct loadV(vReg dst, vmemA mem) %{ + match(Set dst (LoadVector mem)); + ins_cost(VEC_COST); + format %{ "vle $dst, $mem\t#@loadV" %} + ins_encode %{ + VectorRegister dst_reg = as_VectorRegister($dst$$reg); + loadStore(C2_MacroAssembler(&cbuf), false, dst_reg, + vector_element_basic_type(this), as_Register($mem$$base)); + %} + ins_pipe(pipe_slow); +%} + +instruct storeV(vReg src, vmemA mem) %{ + match(Set mem (StoreVector mem src)); + ins_cost(VEC_COST); + format %{ "vse $src, $mem\t#@storeV" %} + ins_encode %{ + VectorRegister src_reg = as_VectorRegister($src$$reg); + loadStore(C2_MacroAssembler(&cbuf), true, src_reg, + vector_element_basic_type(this, $src), as_Register($mem$$base)); + %} + ins_pipe(pipe_slow); +%} + +// vector abs + +instruct vabsB(vReg dst, vReg src, vReg tmp) %{ + match(Set dst (AbsVB src)); + ins_cost(VEC_COST); + effect(TEMP tmp); + format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t" + "vmax.vv $dst, $tmp, $src" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e8); + __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); + __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsS(vReg dst, vReg src, vReg tmp) %{ + match(Set dst (AbsVS src)); + ins_cost(VEC_COST); + effect(TEMP tmp); + format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t" + "vmax.vv $dst, $tmp, $src" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e16); + __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); + __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsI(vReg dst, vReg src, vReg tmp) %{ + match(Set dst (AbsVI src)); + ins_cost(VEC_COST); + effect(TEMP tmp); + format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t" + "vmax.vv $dst, $tmp, $src" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); + __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsL(vReg dst, vReg src, vReg tmp) %{ + match(Set dst (AbsVL src)); + ins_cost(VEC_COST); + effect(TEMP tmp); + format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t" + "vmax.vv $dst, $tmp, $src" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); + __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsF(vReg dst, vReg src) %{ + match(Set dst (AbsVF src)); + ins_cost(VEC_COST); + format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsD(vReg dst, vReg src) %{ + match(Set dst (AbsVD src)); + ins_cost(VEC_COST); + format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector add + +instruct vaddB(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (AddVB src1 src2)); + ins_cost(VEC_COST); + format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e8); + __ vadd_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddS(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (AddVS src1 src2)); + ins_cost(VEC_COST); + format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e16); + __ vadd_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddI(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (AddVI src1 src2)); + ins_cost(VEC_COST); + format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vadd_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddL(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (AddVL src1 src2)); + ins_cost(VEC_COST); + format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vadd_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddF(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (AddVF src1 src2)); + ins_cost(VEC_COST); + format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vfadd_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddD(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (AddVD src1 src2)); + ins_cost(VEC_COST); + format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vfadd_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector and + +instruct vand(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (AndV src1 src2)); + ins_cost(VEC_COST); + format %{ "vand.vv $dst, $src1, $src2\t#@vand" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vand_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector or + +instruct vor(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (OrV src1 src2)); + ins_cost(VEC_COST); + format %{ "vor.vv $dst, $src1, $src2\t#@vor" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vor_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector xor + +instruct vxor(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (XorV src1 src2)); + ins_cost(VEC_COST); + format %{ "vxor.vv $dst, $src1, $src2\t#@vxor" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vxor_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector float div + +instruct vdivF(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (DivVF src1 src2)); + ins_cost(VEC_COST); + format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivF" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vfdiv_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vdivD(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (DivVD src1 src2)); + ins_cost(VEC_COST); + format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivD" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vfdiv_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector integer max/min + +instruct vmax(vReg dst, vReg src1, vReg src2) %{ + predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT && + n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE); + match(Set dst (MaxV src1 src2)); + ins_cost(VEC_COST); + format %{ "vmax.vv $dst, $src1, $src2\t#@vmax" %} + ins_encode %{ + BasicType bt = vector_element_basic_type(this); + Assembler::SEW sew = Assembler::elemtype_to_sew(bt); + __ vsetvli(t0, x0, sew); + __ vmax_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmin(vReg dst, vReg src1, vReg src2) %{ + predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT && + n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE); + match(Set dst (MinV src1 src2)); + ins_cost(VEC_COST); + format %{ "vmin.vv $dst, $src1, $src2\t#@vmin" %} + ins_encode %{ + BasicType bt = vector_element_basic_type(this); + Assembler::SEW sew = Assembler::elemtype_to_sew(bt); + __ vsetvli(t0, x0, sew); + __ vmin_vv(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector float-point max/min + +instruct vmaxF(vReg dst, vReg src1, vReg src2) %{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MaxV src1 src2)); + effect(TEMP_DEF dst); + ins_cost(VEC_COST); + format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %} + ins_encode %{ + __ minmax_FD_v(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), + false /* is_double */, false /* is_min */); + %} + ins_pipe(pipe_slow); +%} + +instruct vmaxD(vReg dst, vReg src1, vReg src2) %{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MaxV src1 src2)); + effect(TEMP_DEF dst); + ins_cost(VEC_COST); + format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %} + ins_encode %{ + __ minmax_FD_v(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), + true /* is_double */, false /* is_min */); + %} + ins_pipe(pipe_slow); +%} + +instruct vminF(vReg dst, vReg src1, vReg src2) %{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MinV src1 src2)); + effect(TEMP_DEF dst); + ins_cost(VEC_COST); + format %{ "vminF $dst, $src1, $src2\t#@vminF" %} + ins_encode %{ + __ minmax_FD_v(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), + false /* is_double */, true /* is_min */); + %} + ins_pipe(pipe_slow); +%} + +instruct vminD(vReg dst, vReg src1, vReg src2) %{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MinV src1 src2)); + effect(TEMP_DEF dst); + ins_cost(VEC_COST); + format %{ "vminD $dst, $src1, $src2\t#@vminD" %} + ins_encode %{ + __ minmax_FD_v(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), + true /* is_double */, true /* is_min */); + %} + ins_pipe(pipe_slow); +%} + +// vector fmla + +// dst_src1 = dst_src1 + src2 * src3 +instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA); + match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); + ins_cost(VEC_COST); + format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 + src2 * src3 +instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA); + match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); + ins_cost(VEC_COST); + format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector fmls + +// dst_src1 = dst_src1 + -src2 * src3 +// dst_src1 = dst_src1 + src2 * -src3 +instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA); + match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); + match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); + ins_cost(VEC_COST); + format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 + -src2 * src3 +// dst_src1 = dst_src1 + src2 * -src3 +instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA); + match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); + match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); + ins_cost(VEC_COST); + format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector fnmla + +// dst_src1 = -dst_src1 + -src2 * src3 +// dst_src1 = -dst_src1 + src2 * -src3 +instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA); + match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); + match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); + ins_cost(VEC_COST); + format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = -dst_src1 + -src2 * src3 +// dst_src1 = -dst_src1 + src2 * -src3 +instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA); + match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); + match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); + ins_cost(VEC_COST); + format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector fnmls + +// dst_src1 = -dst_src1 + src2 * src3 +instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA); + match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); + ins_cost(VEC_COST); + format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = -dst_src1 + src2 * src3 +instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseFMA); + match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); + ins_cost(VEC_COST); + format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector mla + +// dst_src1 = dst_src1 + src2 * src3 +instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{ + match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3))); + ins_cost(VEC_COST); + format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e8); + __ vmacc_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 + src2 * src3 +instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{ + match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3))); + ins_cost(VEC_COST); + format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e16); + __ vmacc_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 + src2 * src3 +instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{ + match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3))); + ins_cost(VEC_COST); + format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vmacc_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 + src2 * src3 +instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{ + match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3))); + ins_cost(VEC_COST); + format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vmacc_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector mls + +// dst_src1 = dst_src1 - src2 * src3 +instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{ + match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3))); + ins_cost(VEC_COST); + format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e8); + __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 - src2 * src3 +instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{ + match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3))); + ins_cost(VEC_COST); + format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e16); + __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 - src2 * src3 +instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{ + match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3))); + ins_cost(VEC_COST); + format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// dst_src1 = dst_src1 - src2 * src3 +instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{ + match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3))); + ins_cost(VEC_COST); + format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector mul + +instruct vmulB(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (MulVB src1 src2)); + ins_cost(VEC_COST); + format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e8); + __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulS(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (MulVS src1 src2)); + ins_cost(VEC_COST); + format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e16); + __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulI(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (MulVI src1 src2)); + ins_cost(VEC_COST); + format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulL(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (MulVL src1 src2)); + ins_cost(VEC_COST); + format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulF(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (MulVF src1 src2)); + ins_cost(VEC_COST); + format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulD(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (MulVD src1 src2)); + ins_cost(VEC_COST); + format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector fneg + +instruct vnegF(vReg dst, vReg src) %{ + match(Set dst (NegVF src)); + ins_cost(VEC_COST); + format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vnegD(vReg dst, vReg src) %{ + match(Set dst (NegVD src)); + ins_cost(VEC_COST); + format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// popcount vector + +instruct vpopcountI(iRegINoSp dst, vReg src) %{ + match(Set dst (PopCountVI src)); + format %{ "vpopc.m $dst, $src\t#@vpopcountI" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector add reduction + +instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t" + "vredsum.vs $tmp, $src2, $tmp\n\t" + "vmv.x.s $dst, $tmp" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e8); + __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); + __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($tmp$$reg)); + __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t" + "vredsum.vs $tmp, $src2, $tmp\n\t" + "vmv.x.s $dst, $tmp" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e16); + __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); + __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($tmp$$reg)); + __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t" + "vredsum.vs $tmp, $src2, $tmp\n\t" + "vmv.x.s $dst, $tmp" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); + __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($tmp$$reg)); + __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (AddReductionVL src1 src2)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t" + "vredsum.vs $tmp, $src2, $tmp\n\t" + "vmv.x.s $dst, $tmp" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); + __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($tmp$$reg)); + __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{ + match(Set src1_dst (AddReductionVF src1_dst src2)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t" + "vfredosum.vs $tmp, $src2, $tmp\n\t" + "vfmv.f.s $src1_dst, $tmp" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); + __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($tmp$$reg)); + __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{ + match(Set src1_dst (AddReductionVD src1_dst src2)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t" + "vfredosum.vs $tmp, $src2, $tmp\n\t" + "vfmv.f.s $src1_dst, $tmp" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); + __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($tmp$$reg)); + __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector integer max reduction +instruct vreduce_maxB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MaxReductionV src1 src2)); + ins_cost(VEC_COST); + effect(TEMP tmp); + format %{ "vreduce_maxB $dst, $src1, $src2, $tmp" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e8); + __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); + __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); + Label Ldone; + __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); + __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); + __ bind(Ldone); + %} + ins_pipe(pipe_slow); +%} + +instruct vreduce_maxS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MaxReductionV src1 src2)); + ins_cost(VEC_COST); + effect(TEMP tmp); + format %{ "vreduce_maxS $dst, $src1, $src2, $tmp" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e16); + __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); + __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); + Label Ldone; + __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); + __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); + __ bind(Ldone); + %} + ins_pipe(pipe_slow); +%} + +instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MaxReductionV src1 src2)); + ins_cost(VEC_COST); + effect(TEMP tmp); + format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); + __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); + __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MaxReductionV src1 src2)); + ins_cost(VEC_COST); + effect(TEMP tmp); + format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); + __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); + __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector integer min reduction +instruct vreduce_minB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MinReductionV src1 src2)); + ins_cost(VEC_COST); + effect(TEMP tmp); + format %{ "vreduce_minB $dst, $src1, $src2, $tmp" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e8); + __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); + __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); + Label Ldone; + __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); + __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); + __ bind(Ldone); + %} + ins_pipe(pipe_slow); +%} + +instruct vreduce_minS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MinReductionV src1 src2)); + ins_cost(VEC_COST); + effect(TEMP tmp); + format %{ "vreduce_minS $dst, $src1, $src2, $tmp" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e16); + __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); + __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); + Label Ldone; + __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); + __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); + __ bind(Ldone); + %} + ins_pipe(pipe_slow); +%} + +instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MinReductionV src1 src2)); + ins_cost(VEC_COST); + effect(TEMP tmp); + format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); + __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); + __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MinReductionV src1 src2)); + ins_cost(VEC_COST); + effect(TEMP tmp); + format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); + __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); + __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector float max reduction + +instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MaxReductionV src1 src2)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %} + ins_encode %{ + __ reduce_minmax_FD_v($dst$$FloatRegister, + $src1$$FloatRegister, as_VectorRegister($src2$$reg), + as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), + false /* is_double */, false /* is_min */); + %} + ins_pipe(pipe_slow); +%} + +instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MaxReductionV src1 src2)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %} + ins_encode %{ + __ reduce_minmax_FD_v($dst$$FloatRegister, + $src1$$FloatRegister, as_VectorRegister($src2$$reg), + as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), + true /* is_double */, false /* is_min */); + %} + ins_pipe(pipe_slow); +%} + +// vector float min reduction + +instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (MinReductionV src1 src2)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %} + ins_encode %{ + __ reduce_minmax_FD_v($dst$$FloatRegister, + $src1$$FloatRegister, as_VectorRegister($src2$$reg), + as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), + false /* is_double */, true /* is_min */); + %} + ins_pipe(pipe_slow); +%} + +instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (MinReductionV src1 src2)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %} + ins_encode %{ + __ reduce_minmax_FD_v($dst$$FloatRegister, + $src1$$FloatRegister, as_VectorRegister($src2$$reg), + as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), + true /* is_double */, true /* is_min */); + %} + ins_pipe(pipe_slow); +%} + +// vector Math.rint, floor, ceil + +instruct vroundD(vReg dst, vReg src, immI rmode) %{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (RoundDoubleModeV src rmode)); + format %{ "vroundD $dst, $src, $rmode" %} + ins_encode %{ + switch ($rmode$$constant) { + case RoundDoubleModeNode::rmode_rint: + __ csrwi(CSR_FRM, C2_MacroAssembler::rne); + __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); + break; + case RoundDoubleModeNode::rmode_floor: + __ csrwi(CSR_FRM, C2_MacroAssembler::rdn); + __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); + break; + case RoundDoubleModeNode::rmode_ceil: + __ csrwi(CSR_FRM, C2_MacroAssembler::rup); + __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); + break; + default: + ShouldNotReachHere(); + break; + } + %} + ins_pipe(pipe_slow); +%} + +// vector replicate + +instruct replicateB(vReg dst, iRegIorL2I src) %{ + match(Set dst (ReplicateB src)); + ins_cost(VEC_COST); + format %{ "vmv.v.x $dst, $src\t#@replicateB" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e8); + __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateS(vReg dst, iRegIorL2I src) %{ + match(Set dst (ReplicateS src)); + ins_cost(VEC_COST); + format %{ "vmv.v.x $dst, $src\t#@replicateS" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e16); + __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateI(vReg dst, iRegIorL2I src) %{ + match(Set dst (ReplicateI src)); + ins_cost(VEC_COST); + format %{ "vmv.v.x $dst, $src\t#@replicateI" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateL(vReg dst, iRegL src) %{ + match(Set dst (ReplicateL src)); + ins_cost(VEC_COST); + format %{ "vmv.v.x $dst, $src\t#@replicateL" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateB_imm5(vReg dst, immI5 con) %{ + match(Set dst (ReplicateB con)); + ins_cost(VEC_COST); + format %{ "vmv.v.i $dst, $con\t#@replicateB_imm5" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e8); + __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateS_imm5(vReg dst, immI5 con) %{ + match(Set dst (ReplicateS con)); + ins_cost(VEC_COST); + format %{ "vmv.v.i $dst, $con\t#@replicateS_imm5" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e16); + __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateI_imm5(vReg dst, immI5 con) %{ + match(Set dst (ReplicateI con)); + ins_cost(VEC_COST); + format %{ "vmv.v.i $dst, $con\t#@replicateI_imm5" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateL_imm5(vReg dst, immL5 con) %{ + match(Set dst (ReplicateL con)); + ins_cost(VEC_COST); + format %{ "vmv.v.i $dst, $con\t#@replicateL_imm5" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateF(vReg dst, fRegF src) %{ + match(Set dst (ReplicateF src)); + ins_cost(VEC_COST); + format %{ "vfmv.v.f $dst, $src\t#@replicateF" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct replicateD(vReg dst, fRegD src) %{ + match(Set dst (ReplicateD src)); + ins_cost(VEC_COST); + format %{ "vfmv.v.f $dst, $src\t#@replicateD" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + +// vector shift + +instruct vasrB(vReg dst, vReg src, vReg shift) %{ + match(Set dst (RShiftVB src shift)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst); + format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t" + "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t" + "vmnot.m v0, v0\n\t" + "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e8); + // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits + __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); + __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + BitsPerByte - 1, Assembler::v0_t); + // otherwise, shift + __ vmnot_m(v0, v0); + __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrS(vReg dst, vReg src, vReg shift) %{ + match(Set dst (RShiftVS src shift)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst); + format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t" + "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t" + "vmnot.m v0, v0\n\t" + "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e16); + // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits + __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); + __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + BitsPerShort - 1, Assembler::v0_t); + // otherwise, shift + __ vmnot_m(v0, v0); + __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrI(vReg dst, vReg src, vReg shift) %{ + match(Set dst (RShiftVI src shift)); + ins_cost(VEC_COST); + format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrL(vReg dst, vReg src, vReg shift) %{ + match(Set dst (RShiftVL src shift)); + ins_cost(VEC_COST); + format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslB(vReg dst, vReg src, vReg shift) %{ + match(Set dst (LShiftVB src shift)); + ins_cost(VEC_COST); + effect( TEMP_DEF dst); + format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t" + "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" + "vmnot.m v0, v0\n\t" + "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e8); + // if shift > BitsPerByte - 1, clear the element + __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); + __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg), Assembler::v0_t); + // otherwise, shift + __ vmnot_m(v0, v0); + __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslS(vReg dst, vReg src, vReg shift) %{ + match(Set dst (LShiftVS src shift)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst); + format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t" + "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" + "vmnot.m v0, v0\n\t" + "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e16); + // if shift > BitsPerShort - 1, clear the element + __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); + __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg), Assembler::v0_t); + // otherwise, shift + __ vmnot_m(v0, v0); + __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslI(vReg dst, vReg src, vReg shift) %{ + match(Set dst (LShiftVI src shift)); + ins_cost(VEC_COST); + format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslL(vReg dst, vReg src, vReg shift) %{ + match(Set dst (LShiftVL src shift)); + ins_cost(VEC_COST); + format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrB(vReg dst, vReg src, vReg shift) %{ + match(Set dst (URShiftVB src shift)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst); + format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t" + "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" + "vmnot.m v0, v0, v0\n\t" + "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e8); + // if shift > BitsPerByte - 1, clear the element + __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); + __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg), Assembler::v0_t); + // otherwise, shift + __ vmnot_m(v0, v0); + __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrS(vReg dst, vReg src, vReg shift) %{ + match(Set dst (URShiftVS src shift)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst); + format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t" + "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" + "vmnot.m v0, v0\n\t" + "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e16); + // if shift > BitsPerShort - 1, clear the element + __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); + __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg), Assembler::v0_t); + // otherwise, shift + __ vmnot_m(v0, v0); + __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($shift$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + + +instruct vlsrI(vReg dst, vReg src, vReg shift) %{ + match(Set dst (URShiftVI src shift)); + ins_cost(VEC_COST); + format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + + +instruct vlsrL(vReg dst, vReg src, vReg shift) %{ + match(Set dst (URShiftVL src shift)); + ins_cost(VEC_COST); + format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($shift$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ + match(Set dst (RShiftVB src (RShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli(t0, x0, Assembler::e8); + if (con == 0) { + __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg)); + return; + } + if (con >= BitsPerByte) con = BitsPerByte - 1; + __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ + match(Set dst (RShiftVS src (RShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli(t0, x0, Assembler::e16); + if (con == 0) { + __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg)); + return; + } + if (con >= BitsPerShort) con = BitsPerShort - 1; + __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ + match(Set dst (RShiftVI src (RShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli(t0, x0, Assembler::e32); + if (con == 0) { + __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg)); + return; + } + __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ + predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); + match(Set dst (RShiftVL src (RShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli(t0, x0, Assembler::e64); + if (con == 0) { + __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg)); + return; + } + __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ + match(Set dst (URShiftVB src (RShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli(t0, x0, Assembler::e8); + if (con == 0) { + __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg)); + return; + } + if (con >= BitsPerByte) { + __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg)); + return; + } + __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ + match(Set dst (URShiftVS src (RShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli(t0, x0, Assembler::e16); + if (con == 0) { + __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg)); + return; + } + if (con >= BitsPerShort) { + __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg)); + return; + } + __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ + match(Set dst (URShiftVI src (RShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli(t0, x0, Assembler::e32); + if (con == 0) { + __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg)); + return; + } + __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ + predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); + match(Set dst (URShiftVL src (RShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli(t0, x0, Assembler::e64); + if (con == 0) { + __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg)); + return; + } + __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ + match(Set dst (LShiftVB src (LShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli(t0, x0, Assembler::e8); + if (con >= BitsPerByte) { + __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg)); + return; + } + __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ + match(Set dst (LShiftVS src (LShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli(t0, x0, Assembler::e16); + if (con >= BitsPerShort) { + __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg)); + return; + } + __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ + match(Set dst (LShiftVI src (LShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli(t0, x0, Assembler::e32); + __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ + predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); + match(Set dst (LShiftVL src (LShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli(t0, x0, Assembler::e64); + __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e8); + __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || + n->bottom_type()->is_vect()->element_basic_type() == T_CHAR); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e16); + __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector sqrt + +instruct vsqrtF(vReg dst, vReg src) %{ + match(Set dst (SqrtVF src)); + ins_cost(VEC_COST); + format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsqrtD(vReg dst, vReg src) %{ + match(Set dst (SqrtVD src)); + ins_cost(VEC_COST); + format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// vector sub + +instruct vsubB(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (SubVB src1 src2)); + ins_cost(VEC_COST); + format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e8); + __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubS(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (SubVS src1 src2)); + ins_cost(VEC_COST); + format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e16); + __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubI(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (SubVI src1 src2)); + ins_cost(VEC_COST); + format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubL(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (SubVL src1 src2)); + ins_cost(VEC_COST); + format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubF(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (SubVF src1 src2)); + ins_cost(VEC_COST); + format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e32); + __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubD(vReg dst, vReg src1, vReg src2) %{ + match(Set dst (SubVD src1 src2)); + ins_cost(VEC_COST); + format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %} + ins_encode %{ + __ vsetvli(t0, x0, Assembler::e64); + __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), + as_VectorRegister($src2$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, + iRegI_R10 result, vReg_V1 v1, + vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) +%{ + predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); + + format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} + ins_encode %{ + // Count is in 8-bit bytes; non-Compact chars are 16 bits. + __ string_equals_v($str1$$Register, $str2$$Register, + $result$$Register, $cnt$$Register, 1); + %} + ins_pipe(pipe_class_memory); +%} + +instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, + iRegI_R10 result, vReg_V1 v1, + vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) +%{ + predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); + + format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} + ins_encode %{ + // Count is in 8-bit bytes; non-Compact chars are 16 bits. + __ string_equals_v($str1$$Register, $str2$$Register, + $result$$Register, $cnt$$Register, 2); + %} + ins_pipe(pipe_class_memory); +%} + +instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, + vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) +%{ + predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (AryEq ary1 ary2)); + effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); + + format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} + ins_encode %{ + __ arrays_equals_v($ary1$$Register, $ary2$$Register, + $result$$Register, $tmp$$Register, 1); + %} + ins_pipe(pipe_class_memory); +%} + +instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, + vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) +%{ + predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (AryEq ary1 ary2)); + effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); + + format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} + ins_encode %{ + __ arrays_equals_v($ary1$$Register, $ary2$$Register, + $result$$Register, $tmp$$Register, 2); + %} + ins_pipe(pipe_class_memory); +%} + +instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, + iRegP_R28 tmp1, iRegL_R29 tmp2) +%{ + predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); + match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, + TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + + format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} + ins_encode %{ + // Count is in 8-bit bytes; non-Compact chars are 16 bits. + __ string_compare_v($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, + StrIntrinsicNode::UU); + %} + ins_pipe(pipe_class_memory); +%} +instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, + iRegP_R28 tmp1, iRegL_R29 tmp2) +%{ + predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); + match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, + TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + + format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} + ins_encode %{ + __ string_compare_v($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, + StrIntrinsicNode::LL); + %} + ins_pipe(pipe_class_memory); +%} + +instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, + iRegP_R28 tmp1, iRegL_R29 tmp2) +%{ + predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); + match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, + TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + + format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} + ins_encode %{ + __ string_compare_v($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, + StrIntrinsicNode::UL); + %} + ins_pipe(pipe_class_memory); +%} +instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, + iRegP_R28 tmp1, iRegL_R29 tmp2) +%{ + predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); + match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, + TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + + format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} + ins_encode %{ + __ string_compare_v($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, + StrIntrinsicNode::LU); + %} + ins_pipe(pipe_class_memory); +%} + +// fast byte[] to char[] inflation +instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len, + vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) +%{ + predicate(UseRVV); + match(Set dummy (StrInflatedCopy src (Binary dst len))); + effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len); + + format %{ "String Inflate $src,$dst" %} + ins_encode %{ + __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +// encode char[] to byte[] in ISO_8859_1 +instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, + vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) +%{ + predicate(UseRVV); + match(Set result (EncodeISOArray src (Binary dst len))); + effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, + TEMP v1, TEMP v2, TEMP v3, TEMP tmp); + + format %{ "Encode array $src,$dst,$len -> $result" %} + ins_encode %{ + __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register, + $result$$Register, $tmp$$Register); + %} + ins_pipe( pipe_class_memory ); +%} + +// fast char[] to byte[] compression +instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, + vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) +%{ + predicate(UseRVV); + match(Set result (StrCompressedCopy src (Binary dst len))); + effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, + TEMP v1, TEMP v2, TEMP v3, TEMP tmp); + + format %{ "String Compress $src,$dst -> $result // KILL R11, R12, R13" %} + ins_encode %{ + __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register, + $result$$Register, $tmp$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct vhas_negatives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, iRegL tmp) +%{ + predicate(UseRVV); + match(Set result (HasNegatives ary len)); + effect(USE_KILL ary, USE_KILL len, TEMP tmp); + + format %{ "has negatives byte[] $ary,$len -> $result" %} + ins_encode %{ + __ has_negatives_v($ary$$Register, $len$$Register, $result$$Register, $tmp$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, + vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) +%{ + predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); + effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, + TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); + + format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %} + + ins_encode %{ + __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register, + $result$$Register, $tmp1$$Register, $tmp2$$Register, + false /* isL */); + %} + + ins_pipe(pipe_class_memory); +%} + +instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, + vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) +%{ + predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); + effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, + TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); + + format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %} + + ins_encode %{ + __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register, + $result$$Register, $tmp1$$Register, $tmp2$$Register, + true /* isL */); + %} + + ins_pipe(pipe_class_memory); +%} + +// clearing of an array +instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, + vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3) +%{ + predicate(UseRVV); + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3); + + format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} + + ins_encode %{ + __ clear_array_v($base$$Register, $cnt$$Register); + %} + + ins_pipe(pipe_class_memory); +%} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp @@ -0,0 +1,2763 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/debugInfoRec.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "compiler/oopMap.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "logging/log.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/klass.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/jniHandles.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/align.hpp" +#include "utilities/formatBuffer.hpp" +#include "vmreg_riscv.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif +#ifdef COMPILER2 +#include "adfiles/ad_riscv.hpp" +#include "opto/runtime.hpp" +#endif + +#define __ masm-> + +const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; + +class SimpleRuntimeFrame { +public: + + // Most of the runtime stubs have this simple frame layout. + // This class exists to make the layout shared in one place. + // Offsets are for compiler stack slots, which are jints. + enum layout { + // The frame sender code expects that fp will be in the "natural" place and + // will override any oopMap setting for it. We must therefore force the layout + // so that it agrees with the frame sender code. + // we don't expect any arg reg save area so riscv asserts that + // frame::arg_reg_save_area_bytes == 0 + fp_off = 0, fp_off2, + return_off, return_off2, + framesize + }; +}; + +class RegisterSaver { + const bool _save_vectors; + public: + RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {} + ~RegisterSaver() {} + OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); + void restore_live_registers(MacroAssembler* masm); + + // Offsets into the register save area + // Used by deoptimization when it is managing result register + // values on its own + // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4) + // |---v0---|<---SP + // |---v1---|save vectors only in generate_handler_blob + // |-- .. --| + // |---v31--|----- + // |---f0---| + // |---f1---| + // | .. | + // |---f31--| + // |---reserved slot for stack alignment---| + // |---x5---| + // | x6 | + // |---.. --| + // |---x31--| + // |---fp---| + // |---ra---| + int v0_offset_in_bytes(void) { return 0; } + int f0_offset_in_bytes(void) { + int f0_offset = 0; +#ifdef COMPILER2 + if (_save_vectors) { + f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers * + BytesPerInt; + } +#endif + return f0_offset; + } + int reserved_slot_offset_in_bytes(void) { + return f0_offset_in_bytes() + + FloatRegisterImpl::max_slots_per_register * + FloatRegisterImpl::number_of_registers * + BytesPerInt; + } + + int reg_offset_in_bytes(Register r) { + assert (r->encoding() > 4, "ra, sp, gp and tp not saved"); + return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize; + } + + int freg_offset_in_bytes(FloatRegister f) { + return f0_offset_in_bytes() + f->encoding() * wordSize; + } + + int ra_offset_in_bytes(void) { + return reserved_slot_offset_in_bytes() + + (RegisterImpl::number_of_registers - 3) * + RegisterImpl::max_slots_per_register * + BytesPerInt; + } +}; + +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { + int vector_size_in_bytes = 0; + int vector_size_in_slots = 0; +#ifdef COMPILER2 + if (_save_vectors) { + vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE); + vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT); + } +#endif + + assert_cond(masm != NULL && total_frame_words != NULL); + int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16); + // OopMap frame size is in compiler stack slots (jint's) not bytes or words + int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; + // The caller will allocate additional_frame_words + int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt; + // CodeBlob frame size is in words. + int frame_size_in_words = frame_size_in_bytes / wordSize; + *total_frame_words = frame_size_in_words; + + // Save Integer, Float and Vector registers. + __ enter(); + __ push_CPU_state(_save_vectors, vector_size_in_bytes); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This + // will allow deoptimization at this safepoint to find all possible + // debug-info recordings, as well as let GC find all oops. + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* oop_map = new OopMap(frame_size_in_slots, 0); + assert_cond(oop_maps != NULL && oop_map != NULL); + + int sp_offset_in_slots = 0; + int step_in_slots = 0; + if (_save_vectors) { + step_in_slots = vector_size_in_slots; + for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { + VectorRegister r = as_VectorRegister(i); + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); + } + } + + step_in_slots = FloatRegisterImpl::max_slots_per_register; + for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { + FloatRegister r = as_FloatRegister(i); + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); + } + + step_in_slots = RegisterImpl::max_slots_per_register; + // skip the slot reserved for alignment, see MacroAssembler::push_reg; + // also skip x5 ~ x6 on the stack because they are caller-saved registers. + sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3; + // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack. + for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { + Register r = as_Register(i); + if (r != xthread) { + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg()); + } + } + + return oop_map; +} + +void RegisterSaver::restore_live_registers(MacroAssembler* masm) { + assert_cond(masm != NULL); +#ifdef COMPILER2 + __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE)); +#else + __ pop_CPU_state(_save_vectors); +#endif + __ leave(); +} + +// Is vector's size (in bytes) bigger than a size saved by default? +// riscv does not ovlerlay the floating-point registers on vector registers like aarch64. +bool SharedRuntime::is_wide_vector(int size) { + return UseRVV; +} + +// The java_calling_convention describes stack locations as ideal slots on +// a frame with no abi restrictions. Since we must observe abi restrictions +// (like the placement of the register window) the slots must be biased by +// the following value. +static int reg2offset_in(VMReg r) { + // Account for saved fp and ra + // This should really be in_preserve_stack_slots + return r->reg2stack() * VMRegImpl::stack_slot_size; +} + +static int reg2offset_out(VMReg r) { + return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; +} + +// --------------------------------------------------------------------------- +// Read the array of BasicTypes from a signature, and compute where the +// arguments should go. Values in the VMRegPair regs array refer to 4-byte +// quantities. Values less than VMRegImpl::stack0 are registers, those above +// refer to 4-byte stack slots. All stack slots are based off of the stack pointer +// as framesizes are fixed. +// VMRegImpl::stack0 refers to the first slot 0(sp). +// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register +// up to RegisterImpl::number_of_registers) are the 64-bit +// integer registers. + +// Note: the INPUTS in sig_bt are in units of Java argument words, +// which are 64-bit. The OUTPUTS are in 32-bit units. + +// The Java calling convention is a "shifted" version of the C ABI. +// By skipping the first C ABI register we can call non-static jni +// methods with small numbers of arguments without having to shuffle +// the arguments at all. Since we control the java ABI we ought to at +// least get some advantage out of it. + +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed) { + // Create the mapping between argument positions and + // registers. + static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { + j_rarg0, j_rarg1, j_rarg2, j_rarg3, + j_rarg4, j_rarg5, j_rarg6, j_rarg7 + }; + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { + j_farg0, j_farg1, j_farg2, j_farg3, + j_farg4, j_farg5, j_farg6, j_farg7 + }; + + uint int_args = 0; + uint fp_args = 0; + uint stk_args = 0; // inc by 2 each time + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_BOOLEAN: // fall through + case T_CHAR: // fall through + case T_BYTE: // fall through + case T_SHORT: // fall through + case T_INT: + if (int_args < Argument::n_int_register_parameters_j) { + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_LONG: // fall through + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + case T_OBJECT: // fall through + case T_ARRAY: // fall through + case T_ADDRESS: + if (int_args < Argument::n_int_register_parameters_j) { + regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_FLOAT: + if (fp_args < Argument::n_float_register_parameters_j) { + regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_DOUBLE: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < Argument::n_float_register_parameters_j) { + regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + default: + ShouldNotReachHere(); + } + } + + return align_up(stk_args, 2); +} + +// Patch the callers callsite with entry to compiled code if it exists. +static void patch_callers_callsite(MacroAssembler *masm) { + assert_cond(masm != NULL); + Label L; + __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); + __ beqz(t0, L); + + __ enter(); + __ push_CPU_state(); + + // VM needs caller's callsite + // VM needs target method + // This needs to be a long call since we will relocate this adapter to + // the codeBuffer and it may not reach + +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + + __ mv(c_rarg0, xmethod); + __ mv(c_rarg1, ra); + int32_t offset = 0; + __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset); + __ jalr(x1, t0, offset); + + // Explicit fence.i required because fixup_callers_callsite may change the code + // stream. + __ safepoint_ifence(); + + __ pop_CPU_state(); + // restore sp + __ leave(); + __ bind(L); +} + +static void gen_c2i_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + Label& skip_fixup) { + // Before we get into the guts of the C2I adapter, see if we should be here + // at all. We've come from compiled code and are attempting to jump to the + // interpreter, which means the caller made a static call to get here + // (vcalls always get a compiled target if there is one). Check for a + // compiled target. If there is one, we need to patch the caller's call. + patch_callers_callsite(masm); + + __ bind(skip_fixup); + + int words_pushed = 0; + + // Since all args are passed on the stack, total_args_passed * + // Interpreter::stackElementSize is the space we need. + + int extraspace = total_args_passed * Interpreter::stackElementSize; + + __ mv(x30, sp); + + // stack is aligned, keep it that way + extraspace = align_up(extraspace, 2 * wordSize); + + if (extraspace) { + __ sub(sp, sp, extraspace); + } + + // Now write the args into the outgoing interpreter space + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half"); + continue; + } + + // offset to start parameters + int st_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; + int next_off = st_off - Interpreter::stackElementSize; + + // Say 4 args: + // i st_off + // 0 32 T_LONG + // 1 24 T_VOID + // 2 16 T_OBJECT + // 3 8 T_BOOL + // - 0 return address + // + // However to make thing extra confusing. Because we can fit a Java long/double in + // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter + // leaves one slot empty and only stores to a single slot. In this case the + // slot that is occupied is the T_VOID slot. See I said it was confusing. + + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // memory to memory use t0 + int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size + + extraspace + + words_pushed * wordSize); + if (!r_2->is_valid()) { + __ lwu(t0, Address(sp, ld_off)); + __ sd(t0, Address(sp, st_off), /*temp register*/esp); + } else { + __ ld(t0, Address(sp, ld_off), /*temp register*/esp); + + // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG + // T_DOUBLE and T_LONG use two slots in the interpreter + if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + // ld_off == LSW, ld_off+wordSize == MSW + // st_off == MSW, next_off == LSW + __ sd(t0, Address(sp, next_off), /*temp register*/esp); +#ifdef ASSERT + // Overwrite the unused slot with known junk + __ li(t0, 0xdeadffffdeadaaaaul); + __ sd(t0, Address(sp, st_off), /*temp register*/esp); +#endif /* ASSERT */ + } else { + __ sd(t0, Address(sp, st_off), /*temp register*/esp); + } + } + } else if (r_1->is_Register()) { + Register r = r_1->as_Register(); + if (!r_2->is_valid()) { + // must be only an int (or less ) so move only 32bits to slot + __ sd(r, Address(sp, st_off)); + } else { + // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG + // T_DOUBLE and T_LONG use two slots in the interpreter + if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + // long/double in gpr +#ifdef ASSERT + // Overwrite the unused slot with known junk + __ li(t0, 0xdeadffffdeadaaabul); + __ sd(t0, Address(sp, st_off), /*temp register*/esp); +#endif /* ASSERT */ + __ sd(r, Address(sp, next_off)); + } else { + __ sd(r, Address(sp, st_off)); + } + } + } else { + assert(r_1->is_FloatRegister(), ""); + if (!r_2->is_valid()) { + // only a float use just part of the slot + __ fsw(r_1->as_FloatRegister(), Address(sp, st_off)); + } else { +#ifdef ASSERT + // Overwrite the unused slot with known junk + __ li(t0, 0xdeadffffdeadaaacul); + __ sd(t0, Address(sp, st_off), /*temp register*/esp); +#endif /* ASSERT */ + __ fsd(r_1->as_FloatRegister(), Address(sp, next_off)); + } + } + } + + __ mv(esp, sp); // Interp expects args on caller's expression stack + + __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset()))); + __ jr(t0); +} + +void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs) { + // Cut-out for having no stack args. + int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord; + if (comp_args_on_stack != 0) { + __ sub(t0, sp, comp_words_on_stack * wordSize); + __ andi(sp, t0, -16); + } + + // Will jump to the compiled code just as if compiled code was doing it. + // Pre-load the register-jump target early, to schedule it better. + __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset()))); + + // Now generate the shuffle code. + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half"); + continue; + } + + // Pick up 0, 1 or 2 words from SP+offset. + + assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), + "scrambled load targets?"); + // Load in argument order going down. + int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; + // Point to interpreter value (vs. tag) + int next_off = ld_off - Interpreter::stackElementSize; + + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // Convert stack slot to an SP offset (+ wordSize to account for return address ) + int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size; + if (!r_2->is_valid()) { + __ lw(t0, Address(esp, ld_off)); + __ sd(t0, Address(sp, st_off), /*temp register*/t2); + } else { + // + // We are using two optoregs. This can be either T_OBJECT, + // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates + // two slots but only uses one for thr T_LONG or T_DOUBLE case + // So we must adjust where to pick up the data to match the + // interpreter. + // + // Interpreter local[n] == MSW, local[n+1] == LSW however locals + // are accessed as negative so LSW is at LOW address + + // ld_off is MSW so get LSW + const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? + next_off : ld_off; + __ ld(t0, Address(esp, offset)); + // st_off is LSW (i.e. reg.first()) + __ sd(t0, Address(sp, st_off), /*temp register*/t2); + } + } else if (r_1->is_Register()) { // Register argument + Register r = r_1->as_Register(); + if (r_2->is_valid()) { + // + // We are using two VMRegs. This can be either T_OBJECT, + // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates + // two slots but only uses one for thr T_LONG or T_DOUBLE case + // So we must adjust where to pick up the data to match the + // interpreter. + + const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? + next_off : ld_off; + + // this can be a misaligned move + __ ld(r, Address(esp, offset)); + } else { + // sign extend and use a full word? + __ lw(r, Address(esp, ld_off)); + } + } else { + if (!r_2->is_valid()) { + __ flw(r_1->as_FloatRegister(), Address(esp, ld_off)); + } else { + __ fld(r_1->as_FloatRegister(), Address(esp, next_off)); + } + } + } + + // 6243940 We might end up in handle_wrong_method if + // the callee is deoptimized as we race thru here. If that + // happens we don't want to take a safepoint because the + // caller frame will look interpreted and arguments are now + // "compiled" so it is much better to make this transition + // invisible to the stack walking code. Unfortunately if + // we try and find the callee by normal means a safepoint + // is possible. So we stash the desired callee in the thread + // and the vm will find there should this case occur. + + __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset())); + + __ jr(t1); +} + +// --------------------------------------------------------------- +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterFingerPrint* fingerprint) { + address i2c_entry = __ pc(); + gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + + address c2i_unverified_entry = __ pc(); + Label skip_fixup; + + Label ok; + + const Register holder = t1; + const Register receiver = j_rarg0; + const Register tmp = t2; // A call-clobbered register not used for arg passing + + // ------------------------------------------------------------------------- + // Generate a C2I adapter. On entry we know xmethod holds the Method* during calls + // to the interpreter. The args start out packed in the compiled layout. They + // need to be unpacked into the interpreter layout. This will almost always + // require some stack space. We grow the current (compiled) stack, then repack + // the args. We finally end in a jump to the generic interpreter entry point. + // On exit from the interpreter, the interpreter will restore our SP (lest the + // compiled code, which relys solely on SP and not FP, get sick). + + { + __ block_comment("c2i_unverified_entry {"); + __ load_klass(t0, receiver); + __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset())); + __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset())); + __ beq(t0, tmp, ok); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + + __ bind(ok); + // Method might have been compiled since the call site was patched to + // interpreted; if that is the case treat it as a miss so we can get + // the call site corrected. + __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); + __ beqz(t0, skip_fixup); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + __ block_comment("} c2i_unverified_entry"); + } + + address c2i_entry = __ pc(); + + // Class initialization barrier for static methods + address c2i_no_clinit_check_entry = NULL; + if (VM_Version::supports_fast_class_init_checks()) { + Label L_skip_barrier; + + { // Bypass the barrier for non-static methods + __ lwu(t0, Address(xmethod, Method::access_flags_offset())); + __ andi(t1, t0, JVM_ACC_STATIC); + __ beqz(t1, L_skip_barrier); // non-static + } + + __ load_method_holder(t1, xmethod); + __ clinit_barrier(t1, t0, &L_skip_barrier); + __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + + __ bind(L_skip_barrier); + c2i_no_clinit_check_entry = __ pc(); + } + + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->c2i_entry_barrier(masm); + + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + + __ flush(); + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); +} + +int SharedRuntime::vector_calling_convention(VMRegPair *regs, + uint num_bits, + uint total_args_passed) { + Unimplemented(); + return 0; +} + +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + VMRegPair *regs2, + int total_args_passed) { + assert(regs2 == NULL, "not needed on riscv"); + + // We return the amount of VMRegImpl stack slots we need to reserve for all + // the arguments NOT counting out_preserve_stack_slots. + + static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { + c_rarg0, c_rarg1, c_rarg2, c_rarg3, + c_rarg4, c_rarg5, c_rarg6, c_rarg7 + }; + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { + c_farg0, c_farg1, c_farg2, c_farg3, + c_farg4, c_farg5, c_farg6, c_farg7 + }; + + uint int_args = 0; + uint fp_args = 0; + uint stk_args = 0; // inc by 2 each time + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_BOOLEAN: // fall through + case T_CHAR: // fall through + case T_BYTE: // fall through + case T_SHORT: // fall through + case T_INT: + if (int_args < Argument::n_int_register_parameters_c) { + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_LONG: // fall through + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + case T_OBJECT: // fall through + case T_ARRAY: // fall through + case T_ADDRESS: // fall through + case T_METADATA: + if (int_args < Argument::n_int_register_parameters_c) { + regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_FLOAT: + if (fp_args < Argument::n_float_register_parameters_c) { + regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); + } else if (int_args < Argument::n_int_register_parameters_c) { + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_DOUBLE: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < Argument::n_float_register_parameters_c) { + regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); + } else if (int_args < Argument::n_int_register_parameters_c) { + regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_VOID: // Halves of longs and doubles + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + default: + ShouldNotReachHere(); + } + } + + return stk_args; +} + +// On 64 bit we will store integer like items to the stack as +// 64 bits items (riscv64 abi) even though java would only store +// 32bits for a parameter. On 32bit it will simply be 32 bits +// So this routine will do 32->32 on 32bit and 32->64 on 64bit +static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + assert_cond(masm != NULL); + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ ld(t0, Address(fp, reg2offset_in(src.first()))); + __ sd(t0, Address(sp, reg2offset_out(dst.first()))); + } else { + // stack to reg + __ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); + } else { + if (dst.first() != src.first()) { + // 32bits extend sign + __ addw(dst.first()->as_Register(), src.first()->as_Register(), zr); + } + } +} + +// An oop arg. Must pass a handle not the oop itself +static void object_move(MacroAssembler* masm, + OopMap* map, + int oop_handle_offset, + int framesize_in_slots, + VMRegPair src, + VMRegPair dst, + bool is_receiver, + int* receiver_offset) { + assert_cond(masm != NULL && map != NULL && receiver_offset != NULL); + // must pass a handle. First figure out the location we use as a handle + Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); + + // See if oop is NULL if it is we need no handle + + if (src.first()->is_stack()) { + + // Oop is already on the stack as an argument + int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); + if (is_receiver) { + *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; + } + + __ ld(t0, Address(fp, reg2offset_in(src.first()))); + __ la(rHandle, Address(fp, reg2offset_in(src.first()))); + // conditionally move a NULL + Label notZero1; + __ bnez(t0, notZero1); + __ mv(rHandle, zr); + __ bind(notZero1); + } else { + + // Oop is in an a register we must store it to the space we reserve + // on the stack for oop_handles and pass a handle if oop is non-NULL + + const Register rOop = src.first()->as_Register(); + int oop_slot = -1; + if (rOop == j_rarg0) { + oop_slot = 0; + } else if (rOop == j_rarg1) { + oop_slot = 1; + } else if (rOop == j_rarg2) { + oop_slot = 2; + } else if (rOop == j_rarg3) { + oop_slot = 3; + } else if (rOop == j_rarg4) { + oop_slot = 4; + } else if (rOop == j_rarg5) { + oop_slot = 5; + } else if (rOop == j_rarg6) { + oop_slot = 6; + } else { + assert(rOop == j_rarg7, "wrong register"); + oop_slot = 7; + } + + oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; + int offset = oop_slot * VMRegImpl::stack_slot_size; + + map->set_oop(VMRegImpl::stack2reg(oop_slot)); + // Store oop in handle area, may be NULL + __ sd(rOop, Address(sp, offset)); + if (is_receiver) { + *receiver_offset = offset; + } + + //rOop maybe the same as rHandle + if (rOop == rHandle) { + Label isZero; + __ beqz(rOop, isZero); + __ la(rHandle, Address(sp, offset)); + __ bind(isZero); + } else { + Label notZero2; + __ la(rHandle, Address(sp, offset)); + __ bnez(rOop, notZero2); + __ mv(rHandle, zr); + __ bind(notZero2); + } + } + + // If arg is on the stack then place it otherwise it is already in correct reg. + if (dst.first()->is_stack()) { + __ sd(rHandle, Address(sp, reg2offset_out(dst.first()))); + } +} + +// A float arg may have to do float reg int reg conversion +static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + assert(src.first()->is_stack() && dst.first()->is_stack() || + src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); + assert_cond(masm != NULL); + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + __ lwu(t0, Address(fp, reg2offset_in(src.first()))); + __ sw(t0, Address(sp, reg2offset_out(dst.first()))); + } else if (dst.first()->is_Register()) { + __ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); + } else { + ShouldNotReachHere(); + } + } else if (src.first() != dst.first()) { + if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { + __ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + } else { + ShouldNotReachHere(); + } + } +} + +// A long move +static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + assert_cond(masm != NULL); + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ ld(t0, Address(fp, reg2offset_in(src.first()))); + __ sd(t0, Address(sp, reg2offset_out(dst.first()))); + } else { + // stack to reg + __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); + } else { + if (dst.first() != src.first()) { + __ mv(dst.first()->as_Register(), src.first()->as_Register()); + } + } +} + +// A double move +static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + assert(src.first()->is_stack() && dst.first()->is_stack() || + src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); + assert_cond(masm != NULL); + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + __ ld(t0, Address(fp, reg2offset_in(src.first()))); + __ sd(t0, Address(sp, reg2offset_out(dst.first()))); + } else if (dst.first()-> is_Register()) { + __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); + } else { + ShouldNotReachHere(); + } + } else if (src.first() != dst.first()) { + if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { + __ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + } else { + ShouldNotReachHere(); + } + } +} + +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + assert_cond(masm != NULL); + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ fsw(f10, Address(fp, -3 * wordSize)); + break; + case T_DOUBLE: + __ fsd(f10, Address(fp, -3 * wordSize)); + break; + case T_VOID: break; + default: { + __ sd(x10, Address(fp, -3 * wordSize)); + } + } +} + +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + assert_cond(masm != NULL); + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ flw(f10, Address(fp, -3 * wordSize)); + break; + case T_DOUBLE: + __ fld(f10, Address(fp, -3 * wordSize)); + break; + case T_VOID: break; + default: { + __ ld(x10, Address(fp, -3 * wordSize)); + } + } +} + +static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { + assert_cond(masm != NULL && args != NULL); + RegSet x; + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { + x = x + args[i].first()->as_Register(); + } else if (args[i].first()->is_FloatRegister()) { + __ addi(sp, sp, -2 * wordSize); + __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0)); + } + } + __ push_reg(x, sp); +} + +static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { + assert_cond(masm != NULL && args != NULL); + RegSet x; + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { + x = x + args[i].first()->as_Register(); + } else { + ; + } + } + __ pop_reg(x, sp); + for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { + if (args[i].first()->is_Register()) { + ; + } else if (args[i].first()->is_FloatRegister()) { + __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0)); + __ add(sp, sp, 2 * wordSize); + } + } +} + +static void rt_call(MacroAssembler* masm, address dest) { + assert_cond(masm != NULL); + CodeBlob *cb = CodeCache::find_blob(dest); + if (cb) { + __ far_call(RuntimeAddress(dest)); + } else { + int32_t offset = 0; + __ la_patchable(t0, RuntimeAddress(dest), offset); + __ jalr(x1, t0, offset); + } +} + +static void verify_oop_args(MacroAssembler* masm, + const methodHandle& method, + const BasicType* sig_bt, + const VMRegPair* regs) { + const Register temp_reg = x9; // not part of any compiled calling seq + if (VerifyOops) { + for (int i = 0; i < method->size_of_parameters(); i++) { + if (sig_bt[i] == T_OBJECT || + sig_bt[i] == T_ARRAY) { + VMReg r = regs[i].first(); + assert(r->is_valid(), "bad oop arg"); + if (r->is_stack()) { + __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); + __ verify_oop(temp_reg); + } else { + __ verify_oop(r->as_Register()); + } + } + } + } +} + +static void gen_special_dispatch(MacroAssembler* masm, + const methodHandle& method, + const BasicType* sig_bt, + const VMRegPair* regs) { + verify_oop_args(masm, method, sig_bt, regs); + vmIntrinsics::ID iid = method->intrinsic_id(); + + // Now write the args into the outgoing interpreter space + bool has_receiver = false; + Register receiver_reg = noreg; + int member_arg_pos = -1; + Register member_reg = noreg; + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); + if (ref_kind != 0) { + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument + member_reg = x9; // known to be free at this point + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); + } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { + has_receiver = true; + } else { + fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); + } + + if (member_reg != noreg) { + // Load the member_arg into register, if necessary. + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); + VMReg r = regs[member_arg_pos].first(); + if (r->is_stack()) { + __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + // no data motion is needed + member_reg = r->as_Register(); + } + } + + if (has_receiver) { + // Make sure the receiver is loaded into a register. + assert(method->size_of_parameters() > 0, "oob"); + assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); + VMReg r = regs[0].first(); + assert(r->is_valid(), "bad receiver arg"); + if (r->is_stack()) { + // Porting note: This assumes that compiled calling conventions always + // pass the receiver oop in a register. If this is not true on some + // platform, pick a temp and load the receiver from stack. + fatal("receiver always in a register"); + receiver_reg = x12; // known to be free at this point + __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + // no data motion is needed + receiver_reg = r->as_Register(); + } + } + + // Figure out which address we are really jumping to: + MethodHandles::generate_method_handle_dispatch(masm, iid, + receiver_reg, member_reg, /*for_compiler_entry:*/ true); +} + +// --------------------------------------------------------------------------- +// Generate a native wrapper for a given method. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// convention (handlizes oops, etc), transitions to native, makes the call, +// returns to java state (possibly blocking), unhandlizes any result and +// returns. +// +// Critical native functions are a shorthand for the use of +// GetPrimtiveArrayCritical and disallow the use of any other JNI +// functions. The wrapper is expected to unpack the arguments before +// passing them to the callee and perform checks before and after the +// native call to ensure that they GCLocker +// lock_critical/unlock_critical semantics are followed. Some other +// parts of JNI setup are skipped like the tear down of the JNI handle +// block and the check for pending exceptions it's impossible for them +// to be thrown. +// +// They are roughly structured like this: +// if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical() +// tranistion to thread_in_native +// unpack arrray arguments and call native entry point +// check for safepoint in progress +// check if any thread suspend flags are set +// call into JVM and possible unlock the JNI critical +// if a GC was suppressed while in the critical native. +// transition back to thread_in_Java +// return to caller +// +nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + const methodHandle& method, + int compile_id, + BasicType* in_sig_bt, + VMRegPair* in_regs, + BasicType ret_type, + address critical_entry) { + if (method->is_method_handle_intrinsic()) { + vmIntrinsics::ID iid = method->intrinsic_id(); + intptr_t start = (intptr_t)__ pc(); + int vep_offset = ((intptr_t)__ pc()) - start; + + // First instruction must be a nop as it may need to be patched on deoptimisation + __ nop(); + gen_special_dispatch(masm, + method, + in_sig_bt, + in_regs); + int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period + __ flush(); + int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually + return nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + in_ByteSize(-1), + in_ByteSize(-1), + (OopMapSet*)NULL); + } + address native_func = method->native_function(); + assert(native_func != NULL, "must have function"); + + // An OopMap for lock (and class if static) + OopMapSet *oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + intptr_t start = (intptr_t)__ pc(); + + // We have received a description of where all the java arg are located + // on entry to the wrapper. We need to convert these args to where + // the jni function will expect them. To figure out where they go + // we convert the java signature to a C signature by inserting + // the hidden arguments as arg[0] and possibly arg[1] (static method) + + const int total_in_args = method->size_of_parameters(); + int total_c_args = total_in_args + (method->is_static() ? 2 : 1); + + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); + BasicType* in_elem_bt = NULL; + + int argc = 0; + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } + + for (int i = 0; i < total_in_args ; i++) { + out_sig_bt[argc++] = in_sig_bt[i]; + } + + // Now figure out where the args must be stored and how much stack space + // they require. + int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + + // Compute framesize for the wrapper. We need to handlize all oops in + // incoming registers + + // Calculate the total number of stack slots we will need. + + // First count the abi requirement plus all of the outgoing args + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + + // Now the space for the inbound oop handle area + int total_save_slots = 8 * VMRegImpl::slots_per_word; // 8 arguments passed in registers + + int oop_handle_offset = stack_slots; + stack_slots += total_save_slots; + + // Now any space we need for handlizing a klass if static method + + int klass_slot_offset = 0; + int klass_offset = -1; + int lock_slot_offset = 0; + bool is_static = false; + + if (method->is_static()) { + klass_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; + is_static = true; + } + + // Plus a lock if needed + + if (method->is_synchronized()) { + lock_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + } + + // Now a place (+2) to save return values or temp during shuffling + // + 4 for return address (which we own) and saved fp + stack_slots += 6; + + // Ok The space we have allocated will look like: + // + // + // FP-> | | + // | 2 slots (ra) | + // | 2 slots (fp) | + // |---------------------| + // | 2 slots for moves | + // |---------------------| + // | lock box (if sync) | + // |---------------------| <- lock_slot_offset + // | klass (if static) | + // |---------------------| <- klass_slot_offset + // | oopHandle area | + // |---------------------| <- oop_handle_offset (8 java arg registers) + // | outbound memory | + // | based arguments | + // | | + // |---------------------| + // | | + // SP-> | out_preserved_slots | + // + // + + + // Now compute actual number of stack words we need rounding to make + // stack properly aligned. + stack_slots = align_up(stack_slots, StackAlignmentInSlots); + + int stack_size = stack_slots * VMRegImpl::stack_slot_size; + + // First thing make an ic check to see if we should even be here + + // We are free to use all registers as temps without saving them and + // restoring them except fp. fp is the only callee save register + // as far as the interpreter and the compiler(s) are concerned. + + + const Register ic_reg = t1; + const Register receiver = j_rarg0; + + Label hit; + Label exception_pending; + + assert_different_registers(ic_reg, receiver, t0); + __ verify_oop(receiver); + __ cmp_klass(receiver, ic_reg, t0, hit); + + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + + // Verified entry point must be aligned + __ align(8); + + __ bind(hit); + + int vep_offset = ((intptr_t)__ pc()) - start; + + // If we have to make this method not-entrant we'll overwrite its + // first instruction with a jump. + __ nop(); + + if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { + Label L_skip_barrier; + __ mov_metadata(t1, method->method_holder()); // InstanceKlass* + __ clinit_barrier(t1, t0, &L_skip_barrier); + __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + + __ bind(L_skip_barrier); + } + + // Generate stack overflow check + __ bang_stack_with_offset(checked_cast(StackOverflow::stack_shadow_zone_size())); + + // Generate a new frame for the wrapper. + __ enter(); + // -2 because return address is already present and so is saved fp + __ sub(sp, sp, stack_size - 2 * wordSize); + + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + assert_cond(bs != NULL); + bs->nmethod_entry_barrier(masm); + + // Frame is now completed as far as size and linkage. + int frame_complete = ((intptr_t)__ pc()) - start; + + // We use x18 as the oop handle for the receiver/klass + // It is callee save so it survives the call to native + + const Register oop_handle_reg = x18; + + // + // We immediately shuffle the arguments so that any vm call we have to + // make from here on out (sync slow path, jvmti, etc.) we will have + // captured the oops from our caller and have a valid oopMap for + // them. + + // ----------------- + // The Grand Shuffle + + // The Java calling convention is either equal (linux) or denser (win64) than the + // c calling convention. However the because of the jni_env argument the c calling + // convention always has at least one more (and two for static) arguments than Java. + // Therefore if we move the args from java -> c backwards then we will never have + // a register->register conflict and we don't have to build a dependency graph + // and figure out how to break any cycles. + // + + // Record esp-based slot for receiver on stack for non-static methods + int receiver_offset = -1; + + // This is a trick. We double the stack slots so we can claim + // the oops in the caller's frame. Since we are sure to have + // more args than the caller doubling is enough to make + // sure we can capture all the incoming oop args from the + // caller. + // + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + assert_cond(map != NULL); + + int float_args = 0; + int int_args = 0; + +#ifdef ASSERT + bool reg_destroyed[RegisterImpl::number_of_registers]; + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { + reg_destroyed[r] = false; + } + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { + freg_destroyed[f] = false; + } + +#endif /* ASSERT */ + + // For JNI natives the incoming and outgoing registers are offset upwards. + GrowableArray arg_order(2 * total_in_args); + VMRegPair tmp_vmreg; + tmp_vmreg.set2(x9->as_VMReg()); + + for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { + arg_order.push(i); + arg_order.push(c_arg); + } + + int temploc = -1; + for (int ai = 0; ai < arg_order.length(); ai += 2) { + int i = arg_order.at(ai); + int c_arg = arg_order.at(ai + 1); + __ block_comment(err_msg("mv %d -> %d", i, c_arg)); + assert(c_arg != -1 && i != -1, "wrong order"); +#ifdef ASSERT + if (in_regs[i].first()->is_Register()) { + assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); + } else if (in_regs[i].first()->is_FloatRegister()) { + assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); + } + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; + } +#endif /* ASSERT */ + switch (in_sig_bt[i]) { + case T_ARRAY: + case T_OBJECT: + object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], + ((i == 0) && (!is_static)), + &receiver_offset); + int_args++; + break; + case T_VOID: + break; + + case T_FLOAT: + float_move(masm, in_regs[i], out_regs[c_arg]); + float_args++; + break; + + case T_DOUBLE: + assert( i + 1 < total_in_args && + in_sig_bt[i + 1] == T_VOID && + out_sig_bt[c_arg + 1] == T_VOID, "bad arg list"); + double_move(masm, in_regs[i], out_regs[c_arg]); + float_args++; + break; + + case T_LONG : + long_move(masm, in_regs[i], out_regs[c_arg]); + int_args++; + break; + + case T_ADDRESS: + assert(false, "found T_ADDRESS in java args"); + break; + + default: + move32_64(masm, in_regs[i], out_regs[c_arg]); + int_args++; + } + } + + // point c_arg at the first arg that is already loaded in case we + // need to spill before we call out + int c_arg = total_c_args - total_in_args; + + // Pre-load a static method's oop into c_rarg1. + if (method->is_static()) { + + // load oop into a register + __ movoop(c_rarg1, + JNIHandles::make_local(method->method_holder()->java_mirror()), + /*immediate*/true); + + // Now handlize the static class mirror it's known not-null. + __ sd(c_rarg1, Address(sp, klass_offset)); + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); + + // Now get the handle + __ la(c_rarg1, Address(sp, klass_offset)); + // and protect the arg if we must spill + c_arg--; + } + + // Change state to native (we save the return address in the thread, since it might not + // be pushed on the stack when we do a stack traversal). + // We use the same pc/oopMap repeatedly when we call out + + Label native_return; + __ set_last_Java_frame(sp, noreg, native_return, t0); + + Label dtrace_method_entry, dtrace_method_entry_done; + { + int32_t offset = 0; + __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset); + __ lbu(t0, Address(t0, offset)); + __ addw(t0, t0, zr); + __ bnez(t0, dtrace_method_entry); + __ bind(dtrace_method_entry_done); + } + + // RedefineClasses() tracing support for obsolete method entry + if (log_is_enabled(Trace, redefine, class, obsolete)) { + // protect the args we've loaded + save_args(masm, total_c_args, c_arg, out_regs); + __ mov_metadata(c_rarg1, method()); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + xthread, c_rarg1); + restore_args(masm, total_c_args, c_arg, out_regs); + } + + // Lock a synchronized method + + // Register definitions used by locking and unlocking + + const Register swap_reg = x10; + const Register obj_reg = x9; // Will contain the oop + const Register lock_reg = x30; // Address of compiler lock object (BasicLock) + const Register old_hdr = x30; // value of old header at unlock time + const Register tmp = ra; + + Label slow_path_lock; + Label lock_done; + + if (method->is_synchronized()) { + + const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); + + // Get the handle (the 2nd argument) + __ mv(oop_handle_reg, c_rarg1); + + // Get address of the box + + __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + + // Load the oop from the handle + __ ld(obj_reg, Address(oop_handle_reg, 0)); + + if (UseBiasedLocking) { + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock); + } + + // Load (object->mark() | 1) into swap_reg % x10 + __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ ori(swap_reg, t0, 1); + + // Save (object->mark() | 1) into BasicLock's displaced header + __ sd(swap_reg, Address(lock_reg, mark_word_offset)); + + // src -> dest if dest == x10 else x10 <- dest + { + Label here; + __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL); + } + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 3) == 0, and + // 2) sp <= mark < mark + os::pagesize() + // These 3 tests can be done by evaluating the following + // expression: ((mark - sp) & (3 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 2 bits clear. + // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg + + __ sub(swap_reg, swap_reg, sp); + __ andi(swap_reg, swap_reg, 3 - os::vm_page_size()); + + // Save the test result, for recursive case, the result is zero + __ sd(swap_reg, Address(lock_reg, mark_word_offset)); + __ bnez(swap_reg, slow_path_lock); + + // Slow path will re-enter here + + __ bind(lock_done); + } + + + // Finally just about ready to make the JNI call + + // get JNIEnv* which is first argument to native + __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset()))); + + // Now set thread in native + __ la(t1, Address(xthread, JavaThread::thread_state_offset())); + __ mv(t0, _thread_in_native); + __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + __ sw(t0, Address(t1)); + + rt_call(masm, native_func); + + __ bind(native_return); + + intptr_t return_pc = (intptr_t) __ pc(); + oop_maps->add_gc_map(return_pc - start, map); + + // Unpack native results. + if (ret_type != T_OBJECT && ret_type != T_ARRAY) { + __ cast_primitive_type(ret_type, x10); + } + + Label safepoint_in_progress, safepoint_in_progress_done; + Label after_transition; + + // Switch thread to "native transition" state before reading the synchronization state. + // This additional state is necessary because reading and testing the synchronization + // state is not atomic w.r.t. GC, as this scenario demonstrates: + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. + // VM thread changes sync state to synchronizing and suspends threads for GC. + // Thread A is resumed to finish this native method, but doesn't block here since it + // didn't see any synchronization is progress, and escapes. + __ mv(t0, _thread_in_native_trans); + + __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); + + // Force this write out before the read below + __ membar(MacroAssembler::AnyAny); + + // check for safepoint operation in progress and/or pending suspend requests + { + // We need an acquire here to ensure that any subsequent load of the + // global SafepointSynchronize::_state flag is ordered after this load + // of the thread-local polling word. We don't want this poll to + // return false (i.e. not safepointing) and a later poll of the global + // SafepointSynchronize::_state spuriously to return true. + // This is to avoid a race when we're in a native->Java transition + // racing the code which wakes up from a safepoint. + + __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */); + __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset())); + __ bnez(t0, safepoint_in_progress); + __ bind(safepoint_in_progress_done); + } + + // change thread state + __ la(t1, Address(xthread, JavaThread::thread_state_offset())); + __ mv(t0, _thread_in_Java); + __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + __ sw(t0, Address(t1)); + __ bind(after_transition); + + Label reguard; + Label reguard_done; + __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset())); + __ mv(t1, StackOverflow::stack_guard_yellow_reserved_disabled); + __ beq(t0, t1, reguard); + __ bind(reguard_done); + + // native result if any is live + + // Unlock + Label unlock_done; + Label slow_path_unlock; + if (method->is_synchronized()) { + + // Get locked oop from the handle we passed to jni + __ ld(obj_reg, Address(oop_handle_reg, 0)); + + Label done; + + if (UseBiasedLocking) { + __ biased_locking_exit(obj_reg, old_hdr, done); + } + + // Simple recursive lock? + + __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + __ beqz(t0, done); + + + // Must save x10 if if it is live now because cmpxchg must use it + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + save_native_result(masm, ret_type, stack_slots); + } + + + // get address of the stack lock + __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + // get old displaced header + __ ld(old_hdr, Address(x10, 0)); + + // Atomic swap old header if oop still contains the stack lock + Label succeed; + __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock); + __ bind(succeed); + + // slow path re-enters here + __ bind(unlock_done); + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + restore_native_result(masm, ret_type, stack_slots); + } + + __ bind(done); + } + + Label dtrace_method_exit, dtrace_method_exit_done; + { + int32_t offset = 0; + __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset); + __ lbu(t0, Address(t0, offset)); + __ bnez(t0, dtrace_method_exit); + __ bind(dtrace_method_exit_done); + } + + __ reset_last_Java_frame(false); + + // Unbox oop result, e.g. JNIHandles::resolve result. + if (is_reference_type(ret_type)) { + __ resolve_jobject(x10, xthread, t1); + } + + if (CheckJNICalls) { + // clear_pending_jni_exception_check + __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset())); + } + + // reset handle block + __ ld(x12, Address(xthread, JavaThread::active_handles_offset())); + __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes())); + + __ leave(); + + // Any exception pending? + __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + __ bnez(t0, exception_pending); + + // We're done + __ ret(); + + // Unexpected paths are out of line and go here + + // forward the exception + __ bind(exception_pending); + + // and forward the exception + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + // Slow path locking & unlocking + if (method->is_synchronized()) { + + __ block_comment("Slow path lock {"); + __ bind(slow_path_lock); + + // has last_Java_frame setup. No exceptions so do vanilla call not call_VM + // args are (oop obj, BasicLock* lock, JavaThread* thread) + + // protect the args we've loaded + save_args(masm, total_c_args, c_arg, out_regs); + + __ mv(c_rarg0, obj_reg); + __ mv(c_rarg1, lock_reg); + __ mv(c_rarg2, xthread); + + // Not a leaf but we have last_Java_frame setup as we want + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); + restore_args(masm, total_c_args, c_arg, out_regs); + +#ifdef ASSERT + { Label L; + __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + __ beqz(t0, L); + __ stop("no pending exception allowed on exit from monitorenter"); + __ bind(L); + } +#endif + __ j(lock_done); + + __ block_comment("} Slow path lock"); + + __ block_comment("Slow path unlock {"); + __ bind(slow_path_unlock); + + if (ret_type == T_FLOAT || ret_type == T_DOUBLE) { + save_native_result(masm, ret_type, stack_slots); + } + + __ mv(c_rarg2, xthread); + __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + __ mv(c_rarg0, obj_reg); + + // Save pending exception around call to VM (which contains an EXCEPTION_MARK) + // NOTE that obj_reg == x9 currently + __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + + rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); + +#ifdef ASSERT + { + Label L; + __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + __ beqz(t0, L); + __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); + __ bind(L); + } +#endif /* ASSERT */ + + __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + + if (ret_type == T_FLOAT || ret_type == T_DOUBLE) { + restore_native_result(masm, ret_type, stack_slots); + } + __ j(unlock_done); + + __ block_comment("} Slow path unlock"); + + } // synchronized + + // SLOW PATH Reguard the stack if needed + + __ bind(reguard); + save_native_result(masm, ret_type, stack_slots); + rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); + restore_native_result(masm, ret_type, stack_slots); + // and continue + __ j(reguard_done); + + // SLOW PATH safepoint + { + __ block_comment("safepoint {"); + __ bind(safepoint_in_progress); + + // Don't use call_VM as it will see a possible pending exception and forward it + // and never return here preventing us from clearing _last_native_pc down below. + // + save_native_result(masm, ret_type, stack_slots); + __ mv(c_rarg0, xthread); +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + int32_t offset = 0; + __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset); + __ jalr(x1, t0, offset); + + // Restore any method result value + restore_native_result(masm, ret_type, stack_slots); + + __ j(safepoint_in_progress_done); + __ block_comment("} safepoint"); + } + + // SLOW PATH dtrace support + { + __ block_comment("dtrace entry {"); + __ bind(dtrace_method_entry); + + // We have all of the arguments setup at this point. We must not touch any register + // argument registers at this point (what if we save/restore them there are no oop? + + save_args(masm, total_c_args, c_arg, out_regs); + __ mov_metadata(c_rarg1, method()); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + xthread, c_rarg1); + restore_args(masm, total_c_args, c_arg, out_regs); + __ j(dtrace_method_entry_done); + __ block_comment("} dtrace entry"); + } + + { + __ block_comment("dtrace exit {"); + __ bind(dtrace_method_exit); + save_native_result(masm, ret_type, stack_slots); + __ mov_metadata(c_rarg1, method()); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + xthread, c_rarg1); + restore_native_result(masm, ret_type, stack_slots); + __ j(dtrace_method_exit_done); + __ block_comment("} dtrace exit"); + } + + __ flush(); + + nmethod *nm = nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), + in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), + oop_maps); + assert(nm != NULL, "create native nmethod fail!"); + return nm; +} + +// this function returns the adjust size (in number of words) to a c2i adapter +// activation for use during deoptimization +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { + assert(callee_locals >= callee_parameters, + "test and remove; got more parms than locals"); + if (callee_locals < callee_parameters) { + return 0; // No adjustment for negative locals + } + int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords; + // diff is counted in stack words + return align_up(diff, 2); +} + +//------------------------------generate_deopt_blob---------------------------- +void SharedRuntime::generate_deopt_blob() { + // Allocate space for the code + ResourceMark rm; + // Setup code generation tools + int pad = 0; + CodeBuffer buffer("deopt_blob", 2048 + pad, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + int frame_size_in_words = -1; + OopMap* map = NULL; + OopMapSet *oop_maps = new OopMapSet(); + assert_cond(masm != NULL && oop_maps != NULL); + RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0); + + // ------------- + // This code enters when returning to a de-optimized nmethod. A return + // address has been pushed on the the stack, and return values are in + // registers. + // If we are doing a normal deopt then we were called from the patched + // nmethod from the point we returned to the nmethod. So the return + // address on the stack is wrong by NativeCall::instruction_size + // We will adjust the value so it looks like we have the original return + // address on the stack (like when we eagerly deoptimized). + // In the case of an exception pending when deoptimizing, we enter + // with a return address on the stack that points after the call we patched + // into the exception handler. We have the following register state from, + // e.g., the forward exception stub (see stubGenerator_riscv.cpp). + // x10: exception oop + // x9: exception handler + // x13: throwing pc + // So in this case we simply jam x13 into the useless return address and + // the stack looks just like we want. + // + // At this point we need to de-opt. We save the argument return + // registers. We call the first C routine, fetch_unroll_info(). This + // routine captures the return values and returns a structure which + // describes the current frame size and the sizes of all replacement frames. + // The current frame is compiled code and may contain many inlined + // functions, each with their own JVM state. We pop the current frame, then + // push all the new frames. Then we call the C routine unpack_frames() to + // populate these frames. Finally unpack_frames() returns us the new target + // address. Notice that callee-save registers are BLOWN here; they have + // already been captured in the vframeArray at the time the return PC was + // patched. + address start = __ pc(); + Label cont; + + // Prolog for non exception case! + + // Save everything in sight. + map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + + // Normal deoptimization. Save exec mode for unpack_frames. + __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved + __ j(cont); + + int reexecute_offset = __ pc() - start; + + // Reexecute case + // return address is the pc describes what bci to do re-execute at + + // No need to update map as each call to save_live_registers will produce identical oopmap + (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + + __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved + __ j(cont); + + int exception_offset = __ pc() - start; + + // Prolog for exception case + + // all registers are dead at this entry point, except for x10, and + // x13 which contain the exception oop and exception pc + // respectively. Set them in TLS and fall thru to the + // unpack_with_exception_in_tls entry point. + + __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); + __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); + + int exception_in_tls_offset = __ pc() - start; + + // new implementation because exception oop is now passed in JavaThread + + // Prolog for exception case + // All registers must be preserved because they might be used by LinearScan + // Exceptiop oop and throwing PC are passed in JavaThread + // tos: stack at point of call to method that threw the exception (i.e. only + // args are on the stack, no return address) + + // The return address pushed by save_live_registers will be patched + // later with the throwing pc. The correct value is not available + // now because loading it from memory would destroy registers. + + // NB: The SP at this point must be the SP of the method that is + // being deoptimized. Deoptimization assumes that the frame created + // here by save_live_registers is immediately below the method's SP. + // This is a somewhat fragile mechanism. + + // Save everything in sight. + map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + + // Now it is safe to overwrite any register + + // Deopt during an exception. Save exec mode for unpack_frames. + __ li(xcpool, Deoptimization::Unpack_exception); // callee-saved + + // load throwing pc from JavaThread and patch it as the return address + // of the current frame. Then clear the field in JavaThread + + __ ld(x13, Address(xthread, JavaThread::exception_pc_offset())); + __ sd(x13, Address(fp, frame::return_addr_offset * wordSize)); + __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); + +#ifdef ASSERT + // verify that there is really an exception oop in JavaThread + __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); + __ verify_oop(x10); + + // verify that there is no pending exception + Label no_pending_exception; + __ ld(t0, Address(xthread, Thread::pending_exception_offset())); + __ beqz(t0, no_pending_exception); + __ stop("must not have pending exception here"); + __ bind(no_pending_exception); +#endif + + __ bind(cont); + + // Call C code. Need thread and this frame, but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. + // + // UnrollBlock* fetch_unroll_info(JavaThread* thread) + + // fetch_unroll_info needs to call last_java_frame(). + + Label retaddr; + __ set_last_Java_frame(sp, noreg, retaddr, t0); +#ifdef ASSERT + { + Label L; + __ ld(t0, Address(xthread, + JavaThread::last_Java_fp_offset())); + __ beqz(t0, L); + __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); + __ bind(L); + } +#endif // ASSERT + __ mv(c_rarg0, xthread); + __ mv(c_rarg1, xcpool); + int32_t offset = 0; + __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset); + __ jalr(x1, t0, offset); + __ bind(retaddr); + + // Need to have an oopmap that tells fetch_unroll_info where to + // find any register it might need. + oop_maps->add_gc_map(__ pc() - start, map); + + __ reset_last_Java_frame(false); + + // Load UnrollBlock* into x15 + __ mv(x15, x10); + + __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); + Label noException; + __ li(t0, Deoptimization::Unpack_exception); + __ bne(xcpool, t0, noException); // Was exception pending? + __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); + __ ld(x13, Address(xthread, JavaThread::exception_pc_offset())); + __ sd(zr, Address(xthread, JavaThread::exception_oop_offset())); + __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); + + __ verify_oop(x10); + + // Overwrite the result registers with the exception results. + __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); + + __ bind(noException); + + // Only register save data is on the stack. + // Now restore the result registers. Everything else is either dead + // or captured in the vframeArray. + + // Restore fp result register + __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); + // Restore integer result register + __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); + + // Pop all of the register save area off the stack + __ add(sp, sp, frame_size_in_words * wordSize); + + // All of the register save area has been popped of the stack. Only the + // return address remains. + + // Pop all the frames we must move/replace. + // + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: caller of deopting frame (could be compiled/interpreted). + // + // Note: by leaving the return address of self-frame on the stack + // and using the size of frame 2 to adjust the stack + // when we are done the return to frame 3 will still be on the stack. + + // Pop deoptimized frame + __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); + __ sub(x12, x12, 2 * wordSize); + __ add(sp, sp, x12); + __ ld(fp, Address(sp, 0)); + __ ld(ra, Address(sp, wordSize)); + __ addi(sp, sp, 2 * wordSize); + // RA should now be the return address to the caller (3) + +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. + __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); + __ bang_stack_size(x9, x12); +#endif + // Load address of array of frame pcs into x12 + __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); + + // Load address of array of frame sizes into x14 + __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); + + // Load counter into x13 + __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); + + // Now adjust the caller's stack to make up for the extra locals + // but record the original sp so that we can save it in the skeletal interpreter + // frame and the stack walking of interpreter_sender will get the unextended sp + // value and not the "real" sp value. + + const Register sender_sp = x16; + + __ mv(sender_sp, sp); + __ lwu(x9, Address(x15, + Deoptimization::UnrollBlock:: + caller_adjustment_offset_in_bytes())); + __ sub(sp, sp, x9); + + // Push interpreter frames in a loop + __ li(t0, 0xDEADDEAD); // Make a recognizable pattern + __ mv(t1, t0); + Label loop; + __ bind(loop); + __ ld(x9, Address(x14, 0)); // Load frame size + __ addi(x14, x14, wordSize); + __ sub(x9, x9, 2 * wordSize); // We'll push pc and fp by hand + __ ld(ra, Address(x12, 0)); // Load pc + __ addi(x12, x12, wordSize); + __ enter(); // Save old & set new fp + __ sub(sp, sp, x9); // Prolog + // This value is corrected by layout_activation_impl + __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable + __ mv(sender_sp, sp); // Pass sender_sp to next frame + __ addi(x13, x13, -1); // Decrement counter + __ bnez(x13, loop); + + // Re-push self-frame + __ ld(ra, Address(x12)); + __ enter(); + + // Allocate a full sized register save area. We subtract 2 because + // enter() just pushed 2 words + __ sub(sp, sp, (frame_size_in_words - 2) * wordSize); + + // Restore frame locals after moving the frame + __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); + __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // restore return values to their stack-slots with the new SP. + // + // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) + + // Use fp because the frames look interpreted now + // Don't need the precise return PC here, just precise enough to point into this code blob. + address the_pc = __ pc(); + __ set_last_Java_frame(sp, fp, the_pc, t0); + + __ mv(c_rarg0, xthread); + __ mv(c_rarg1, xcpool); // second arg: exec_mode + offset = 0; + __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset); + __ jalr(x1, t0, offset); + + // Set an oopmap for the call site + // Use the same PC we used for the last java frame + oop_maps->add_gc_map(the_pc - start, + new OopMap(frame_size_in_words, 0)); + + // Clear fp AND pc + __ reset_last_Java_frame(true); + + // Collect return values + __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); + __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); + + // Pop self-frame. + __ leave(); // Epilog + + // Jump to interpreter + __ ret(); + + // Make sure all code is generated + masm->flush(); + + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); + assert(_deopt_blob != NULL, "create deoptimization blob fail!"); + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); +} + +// Number of stack slots between incoming argument block and the start of +// a new frame. The PROLOG must add this many slots to the stack. The +// EPILOG must remove this many slots. +// RISCV needs two words for RA (return address) and FP (frame pointer). +uint SharedRuntime::in_preserve_stack_slots() { + return 2 * VMRegImpl::slots_per_word; +} + +uint SharedRuntime::out_preserve_stack_slots() { + return 0; +} + +#ifdef COMPILER2 +//------------------------------generate_uncommon_trap_blob-------------------- +void SharedRuntime::generate_uncommon_trap_blob() { + // Allocate space for the code + ResourceMark rm; + // Setup code generation tools + CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + assert_cond(masm != NULL); + + assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); + + address start = __ pc(); + + // Push self-frame. We get here with a return address in RA + // and sp should be 16 byte aligned + // push fp and retaddr by hand + __ addi(sp, sp, -2 * wordSize); + __ sd(ra, Address(sp, wordSize)); + __ sd(fp, Address(sp, 0)); + // we don't expect an arg reg save area +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + // compiler left unloaded_class_index in j_rarg0 move to where the + // runtime expects it. + __ addiw(c_rarg1, j_rarg0, 0); + + // we need to set the past SP to the stack pointer of the stub frame + // and the pc to the address where this runtime call will return + // although actually any pc in this code blob will do). + Label retaddr; + __ set_last_Java_frame(sp, noreg, retaddr, t0); + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // capture callee-saved registers as well as return values. + // + // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode) + // + // n.b. 3 gp args, 0 fp args, integral return type + + __ mv(c_rarg0, xthread); + __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap); + int32_t offset = 0; + __ la_patchable(t0, + RuntimeAddress(CAST_FROM_FN_PTR(address, + Deoptimization::uncommon_trap)), offset); + __ jalr(x1, t0, offset); + __ bind(retaddr); + + // Set an oopmap for the call site + OopMapSet* oop_maps = new OopMapSet(); + OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); + assert_cond(oop_maps != NULL && map != NULL); + + // location of fp is known implicitly by the frame sender code + + oop_maps->add_gc_map(__ pc() - start, map); + + __ reset_last_Java_frame(false); + + // move UnrollBlock* into x14 + __ mv(x14, x10); + +#ifdef ASSERT + { Label L; + __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); + __ mvw(t1, Deoptimization::Unpack_uncommon_trap); + __ beq(t0, t1, L); + __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); + __ bind(L); + } +#endif + + // Pop all the frames we must move/replace. + // + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: caller of deopting frame (could be compiled/interpreted). + + __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog! + + // Pop deoptimized frame (int) + __ lwu(x12, Address(x14, + Deoptimization::UnrollBlock:: + size_of_deoptimized_frame_offset_in_bytes())); + __ sub(x12, x12, 2 * wordSize); + __ add(sp, sp, x12); + __ ld(fp, sp, 0); + __ ld(ra, sp, wordSize); + __ addi(sp, sp, 2 * wordSize); + // RA should now be the return address to the caller (3) frame + +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. + __ lwu(x11, Address(x14, + Deoptimization::UnrollBlock:: + total_frame_sizes_offset_in_bytes())); + __ bang_stack_size(x11, x12); +#endif + + // Load address of array of frame pcs into x12 (address*) + __ ld(x12, Address(x14, + Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); + + // Load address of array of frame sizes into x15 (intptr_t*) + __ ld(x15, Address(x14, + Deoptimization::UnrollBlock:: + frame_sizes_offset_in_bytes())); + + // Counter + __ lwu(x13, Address(x14, + Deoptimization::UnrollBlock:: + number_of_frames_offset_in_bytes())); // (int) + + // Now adjust the caller's stack to make up for the extra locals but + // record the original sp so that we can save it in the skeletal + // interpreter frame and the stack walking of interpreter_sender + // will get the unextended sp value and not the "real" sp value. + + const Register sender_sp = t1; // temporary register + + __ lwu(x11, Address(x14, + Deoptimization::UnrollBlock:: + caller_adjustment_offset_in_bytes())); // (int) + __ mv(sender_sp, sp); + __ sub(sp, sp, x11); + + // Push interpreter frames in a loop + Label loop; + __ bind(loop); + __ ld(x11, Address(x15, 0)); // Load frame size + __ sub(x11, x11, 2 * wordSize); // We'll push pc and fp by hand + __ ld(ra, Address(x12, 0)); // Save return address + __ enter(); // and old fp & set new fp + __ sub(sp, sp, x11); // Prolog + __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable + // This value is corrected by layout_activation_impl + __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ mv(sender_sp, sp); // Pass sender_sp to next frame + __ add(x15, x15, wordSize); // Bump array pointer (sizes) + __ add(x12, x12, wordSize); // Bump array pointer (pcs) + __ subw(x13, x13, 1); // Decrement counter + __ bgtz(x13, loop); + __ ld(ra, Address(x12, 0)); // save final return address + // Re-push self-frame + __ enter(); // & old fp & set new fp + + // Use fp because the frames look interpreted now + // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. + // Don't need the precise return PC here, just precise enough to point into this code blob. + address the_pc = __ pc(); + __ set_last_Java_frame(sp, fp, the_pc, t0); + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // restore return values to their stack-slots with the new SP. + // + // BasicType unpack_frames(JavaThread* thread, int exec_mode) + // + + // n.b. 2 gp args, 0 fp args, integral return type + + // sp should already be aligned + __ mv(c_rarg0, xthread); + __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap); + offset = 0; + __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset); + __ jalr(x1, t0, offset); + + // Set an oopmap for the call site + // Use the same PC we used for the last java frame + oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); + + // Clear fp AND pc + __ reset_last_Java_frame(true); + + // Pop self-frame. + __ leave(); // Epilog + + // Jump to interpreter + __ ret(); + + // Make sure all code is generated + masm->flush(); + + _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, + SimpleRuntimeFrame::framesize >> 1); +} +#endif // COMPILER2 + +//------------------------------generate_handler_blob------ +// +// Generate a special Compile2Runtime blob that saves all registers, +// and setup oopmap. +// +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { + ResourceMark rm; + OopMapSet *oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + OopMap* map = NULL; + + // Allocate space for the code. Setup code generation tools. + CodeBuffer buffer("handler_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + assert_cond(masm != NULL); + + address start = __ pc(); + address call_pc = NULL; + int frame_size_in_words = -1; + bool cause_return = (poll_type == POLL_AT_RETURN); + RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); + + // Save Integer and Float registers. + map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + + // The following is basically a call_VM. However, we need the precise + // address of the call in order to generate an oopmap. Hence, we do all the + // work outselves. + + Label retaddr; + __ set_last_Java_frame(sp, noreg, retaddr, t0); + + // The return address must always be correct so that frame constructor never + // sees an invalid pc. + + if (!cause_return) { + // overwrite the return address pushed by save_live_registers + // Additionally, x18 is a callee-saved register so we can look at + // it later to determine if someone changed the return address for + // us! + __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset())); + __ sd(x18, Address(fp, frame::return_addr_offset * wordSize)); + } + + // Do the call + __ mv(c_rarg0, xthread); + int32_t offset = 0; + __ la_patchable(t0, RuntimeAddress(call_ptr), offset); + __ jalr(x1, t0, offset); + __ bind(retaddr); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This + // will allow deoptimization at this safepoint to find all possible + // debug-info recordings, as well as let GC find all oops. + + oop_maps->add_gc_map( __ pc() - start, map); + + Label noException; + + __ reset_last_Java_frame(false); + + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + + __ ld(t0, Address(xthread, Thread::pending_exception_offset())); + __ beqz(t0, noException); + + // Exception pending + + reg_saver.restore_live_registers(masm); + + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + // No exception case + __ bind(noException); + + Label no_adjust, bail; + if (!cause_return) { + // If our stashed return pc was modified by the runtime we avoid touching it + __ ld(t0, Address(fp, frame::return_addr_offset * wordSize)); + __ bne(x18, t0, no_adjust); + +#ifdef ASSERT + // Verify the correct encoding of the poll we're about to skip. + // See NativeInstruction::is_lwu_to_zr() + __ lwu(t0, Address(x18)); + __ andi(t1, t0, 0b0000011); + __ mv(t2, 0b0000011); + __ bne(t1, t2, bail); // 0-6:0b0000011 + __ srli(t1, t0, 7); + __ andi(t1, t1, 0b00000); + __ bnez(t1, bail); // 7-11:0b00000 + __ srli(t1, t0, 12); + __ andi(t1, t1, 0b110); + __ mv(t2, 0b110); + __ bne(t1, t2, bail); // 12-14:0b110 +#endif + // Adjust return pc forward to step over the safepoint poll instruction + __ add(x18, x18, NativeInstruction::instruction_size); + __ sd(x18, Address(fp, frame::return_addr_offset * wordSize)); + } + + __ bind(no_adjust); + // Normal exit, restore registers and exit. + + reg_saver.restore_live_registers(masm); + __ ret(); + +#ifdef ASSERT + __ bind(bail); + __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); +#endif + + // Make sure all code is generated + masm->flush(); + + // Fill-out other meta info + return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); +} + +// +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss +// +// Generate a stub that calls into vm to find out the proper destination +// of a java call. All the argument registers are live at this point +// but since this is generic code we don't know what they are and the caller +// must do any gc of the args. +// +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + // allocate space for the code + ResourceMark rm; + + CodeBuffer buffer(name, 1000, 512); + MacroAssembler* masm = new MacroAssembler(&buffer); + assert_cond(masm != NULL); + + int frame_size_in_words = -1; + RegisterSaver reg_saver(false /* save_vectors */); + + OopMapSet *oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + OopMap* map = NULL; + + int start = __ offset(); + + map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + + int frame_complete = __ offset(); + + { + Label retaddr; + __ set_last_Java_frame(sp, noreg, retaddr, t0); + + __ mv(c_rarg0, xthread); + int32_t offset = 0; + __ la_patchable(t0, RuntimeAddress(destination), offset); + __ jalr(x1, t0, offset); + __ bind(retaddr); + } + + // Set an oopmap for the call site. + // We need this not only for callee-saved registers, but also for volatile + // registers that the compiler might be keeping live across a safepoint. + + oop_maps->add_gc_map( __ offset() - start, map); + + // x10 contains the address we are going to jump to assuming no exception got installed + + // clear last_Java_sp + __ reset_last_Java_frame(false); + // check for pending exceptions + Label pending; + __ ld(t0, Address(xthread, Thread::pending_exception_offset())); + __ bnez(t0, pending); + + // get the returned Method* + __ get_vm_result_2(xmethod, xthread); + __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod))); + + // x10 is where we want to jump, overwrite t0 which is saved and temporary + __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0))); + reg_saver.restore_live_registers(masm); + + // We are back the the original state on entry and ready to go. + + __ jr(t0); + + // Pending exception after the safepoint + + __ bind(pending); + + reg_saver.restore_live_registers(masm); + + // exception pending => remove activation and forward to exception handler + + __ sd(zr, Address(xthread, JavaThread::vm_result_offset())); + + __ ld(x10, Address(xthread, Thread::pending_exception_offset())); + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + // ------------- + // make sure all code is generated + masm->flush(); + + // return the blob + return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); +} + +#ifdef COMPILER2 +RuntimeStub* SharedRuntime::make_native_invoker(address call_target, + int shadow_space_bytes, + const GrowableArray& input_registers, + const GrowableArray& output_registers) { + Unimplemented(); + return nullptr; +} + +//------------------------------generate_exception_blob--------------------------- +// creates exception blob at the end +// Using exception blob, this code is jumped from a compiled method. +// (see emit_exception_handler in riscv.ad file) +// +// Given an exception pc at a call we call into the runtime for the +// handler in this method. This handler might merely restore state +// (i.e. callee save registers) unwind the frame and jump to the +// exception handler for the nmethod if there is no Java level handler +// for the nmethod. +// +// This code is entered with a jmp. +// +// Arguments: +// x10: exception oop +// x13: exception pc +// +// Results: +// x10: exception oop +// x13: exception pc in caller +// destination: exception handler of caller +// +// Note: the exception pc MUST be at a call (precise debug information) +// Registers x10, x13, x12, x14, x15, t0 are not callee saved. +// + +void OptoRuntime::generate_exception_blob() { + assert(!OptoRuntime::is_callee_saved_register(R13_num), ""); + assert(!OptoRuntime::is_callee_saved_register(R10_num), ""); + assert(!OptoRuntime::is_callee_saved_register(R12_num), ""); + + assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); + + // Allocate space for the code + ResourceMark rm; + // Setup code generation tools + CodeBuffer buffer("exception_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + assert_cond(masm != NULL); + + // TODO check various assumptions made here + // + // make sure we do so before running this + + address start = __ pc(); + + // push fp and retaddr by hand + // Exception pc is 'return address' for stack walker + __ addi(sp, sp, -2 * wordSize); + __ sd(ra, Address(sp, wordSize)); + __ sd(fp, Address(sp)); + // there are no callee save registers and we don't expect an + // arg reg save area +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + // Store exception in Thread object. We cannot pass any arguments to the + // handle_exception call, since we do not want to make any assumption + // about the size of the frame where the exception happened in. + __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); + __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); + + // This call does all the hard work. It checks if an exception handler + // exists in the method. + // If so, it returns the handler address. + // If not, it prepares for stack-unwinding, restoring the callee-save + // registers of the frame being removed. + // + // address OptoRuntime::handle_exception_C(JavaThread* thread) + // + // n.b. 1 gp arg, 0 fp args, integral return type + + // the stack should always be aligned + address the_pc = __ pc(); + __ set_last_Java_frame(sp, noreg, the_pc, t0); + __ mv(c_rarg0, xthread); + int32_t offset = 0; + __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset); + __ jalr(x1, t0, offset); + + + // handle_exception_C is a special VM call which does not require an explicit + // instruction sync afterwards. + + // Set an oopmap for the call site. This oopmap will only be used if we + // are unwinding the stack. Hence, all locations will be dead. + // Callee-saved registers will be the same as the frame above (i.e., + // handle_exception_stub), since they were restored when we got the + // exception. + + OopMapSet* oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + + oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); + + __ reset_last_Java_frame(false); + + // Restore callee-saved registers + + // fp is an implicitly saved callee saved register (i.e. the calling + // convention will save restore it in prolog/epilog) Other than that + // there are no callee save registers now that adapter frames are gone. + // and we dont' expect an arg reg save area + __ ld(fp, Address(sp)); + __ ld(x13, Address(sp, wordSize)); + __ addi(sp, sp , 2 * wordSize); + + // x10: exception handler + + // We have a handler in x10 (could be deopt blob). + __ mv(t0, x10); + + // Get the exception oop + __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); + // Get the exception pc in case we are deoptimized + __ ld(x14, Address(xthread, JavaThread::exception_pc_offset())); +#ifdef ASSERT + __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset())); + __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); +#endif + // Clear the exception oop so GC no longer processes it as a root. + __ sd(zr, Address(xthread, JavaThread::exception_oop_offset())); + + // x10: exception oop + // t0: exception handler + // x14: exception pc + // Jump to handler + + __ jr(t0); + + // Make sure all code is generated + masm->flush(); + + // Set exception blob + _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); +} +#endif // COMPILER2 Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp @@ -0,0 +1,3864 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "compiler/oopMap.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/universe.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/instanceOop.hpp" +#include "oops/method.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "utilities/align.hpp" +#include "utilities/powerOfTwo.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif +#if INCLUDE_ZGC +#include "gc/z/zThreadLocalData.hpp" +#endif + +// Declaration and definition of StubGenerator (no .hpp file). +// For a more detailed description of the stub routine structure +// see the comment in stubRoutines.hpp + +#undef __ +#define __ _masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// Stub Code definitions + +class StubGenerator: public StubCodeGenerator { + private: + +#ifdef PRODUCT +#define inc_counter_np(counter) ((void)0) +#else + void inc_counter_np_(int& counter) { + __ la(t1, ExternalAddress((address)&counter)); + __ lwu(t0, Address(t1, 0)); + __ addiw(t0, t0, 1); + __ sw(t0, Address(t1, 0)); + } +#define inc_counter_np(counter) \ + BLOCK_COMMENT("inc_counter " #counter); \ + inc_counter_np_(counter); +#endif + + // Call stubs are used to call Java from C + // + // Arguments: + // c_rarg0: call wrapper address address + // c_rarg1: result address + // c_rarg2: result type BasicType + // c_rarg3: method Method* + // c_rarg4: (interpreter) entry point address + // c_rarg5: parameters intptr_t* + // c_rarg6: parameter size (in words) int + // c_rarg7: thread Thread* + // + // There is no return from the stub itself as any Java result + // is written to result + // + // we save x1 (ra) as the return PC at the base of the frame and + // link x8 (fp) below it as the frame pointer installing sp (x2) + // into fp. + // + // we save x10-x17, which accounts for all the c arguments. + // + // TODO: strictly do we need to save them all? they are treated as + // volatile by C so could we omit saving the ones we are going to + // place in global registers (thread? method?) or those we only use + // during setup of the Java call? + // + // we don't need to save x5 which C uses as an indirect result location + // return register. + // + // we don't need to save x6-x7 and x28-x31 which both C and Java treat as + // volatile + // + // we save x18-x27 which Java uses as temporary registers and C + // expects to be callee-save + // + // so the stub frame looks like this when we enter Java code + // + // [ return_from_Java ] <--- sp + // [ argument word n ] + // ... + // -22 [ argument word 1 ] + // -21 [ saved x27 ] <--- sp_after_call + // -20 [ saved x26 ] + // -19 [ saved x25 ] + // -18 [ saved x24 ] + // -17 [ saved x23 ] + // -16 [ saved x22 ] + // -15 [ saved x21 ] + // -14 [ saved x20 ] + // -13 [ saved x19 ] + // -12 [ saved x18 ] + // -11 [ saved x9 ] + // -10 [ call wrapper (x10) ] + // -9 [ result (x11) ] + // -8 [ result type (x12) ] + // -7 [ method (x13) ] + // -6 [ entry point (x14) ] + // -5 [ parameters (x15) ] + // -4 [ parameter size (x16) ] + // -3 [ thread (x17) ] + // -2 [ saved fp (x8) ] + // -1 [ saved ra (x1) ] + // 0 [ ] <--- fp == saved sp (x2) + + // Call stub stack layout word offsets from fp + enum call_stub_layout { + sp_after_call_off = -21, + + x27_off = -21, + x26_off = -20, + x25_off = -19, + x24_off = -18, + x23_off = -17, + x22_off = -16, + x21_off = -15, + x20_off = -14, + x19_off = -13, + x18_off = -12, + x9_off = -11, + + call_wrapper_off = -10, + result_off = -9, + result_type_off = -8, + method_off = -7, + entry_point_off = -6, + parameters_off = -5, + parameter_size_off = -4, + thread_off = -3, + fp_f = -2, + retaddr_off = -1, + }; + + address generate_call_stub(address& return_address) { + assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 && + (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, + "adjust this code"); + + StubCodeMark mark(this, "StubRoutines", "call_stub"); + address start = __ pc(); + + const Address sp_after_call (fp, sp_after_call_off * wordSize); + + const Address call_wrapper (fp, call_wrapper_off * wordSize); + const Address result (fp, result_off * wordSize); + const Address result_type (fp, result_type_off * wordSize); + const Address method (fp, method_off * wordSize); + const Address entry_point (fp, entry_point_off * wordSize); + const Address parameters (fp, parameters_off * wordSize); + const Address parameter_size(fp, parameter_size_off * wordSize); + + const Address thread (fp, thread_off * wordSize); + + const Address x27_save (fp, x27_off * wordSize); + const Address x26_save (fp, x26_off * wordSize); + const Address x25_save (fp, x25_off * wordSize); + const Address x24_save (fp, x24_off * wordSize); + const Address x23_save (fp, x23_off * wordSize); + const Address x22_save (fp, x22_off * wordSize); + const Address x21_save (fp, x21_off * wordSize); + const Address x20_save (fp, x20_off * wordSize); + const Address x19_save (fp, x19_off * wordSize); + const Address x18_save (fp, x18_off * wordSize); + + const Address x9_save (fp, x9_off * wordSize); + + // stub code + + address riscv_entry = __ pc(); + + // set up frame and move sp to end of save area + __ enter(); + __ addi(sp, fp, sp_after_call_off * wordSize); + + // save register parameters and Java temporary/global registers + // n.b. we save thread even though it gets installed in + // xthread because we want to sanity check tp later + __ sd(c_rarg7, thread); + __ sw(c_rarg6, parameter_size); + __ sd(c_rarg5, parameters); + __ sd(c_rarg4, entry_point); + __ sd(c_rarg3, method); + __ sd(c_rarg2, result_type); + __ sd(c_rarg1, result); + __ sd(c_rarg0, call_wrapper); + + __ sd(x9, x9_save); + + __ sd(x18, x18_save); + __ sd(x19, x19_save); + __ sd(x20, x20_save); + __ sd(x21, x21_save); + __ sd(x22, x22_save); + __ sd(x23, x23_save); + __ sd(x24, x24_save); + __ sd(x25, x25_save); + __ sd(x26, x26_save); + __ sd(x27, x27_save); + + // install Java thread in global register now we have saved + // whatever value it held + __ mv(xthread, c_rarg7); + + // And method + __ mv(xmethod, c_rarg3); + + // set up the heapbase register + __ reinit_heapbase(); + +#ifdef ASSERT + // make sure we have no pending exceptions + { + Label L; + __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + __ beqz(t0, L); + __ stop("StubRoutines::call_stub: entered with pending exception"); + __ BIND(L); + } +#endif + // pass parameters if any + __ mv(esp, sp); + __ slli(t0, c_rarg6, LogBytesPerWord); + __ sub(t0, sp, t0); // Move SP out of the way + __ andi(sp, t0, -2 * wordSize); + + BLOCK_COMMENT("pass parameters if any"); + Label parameters_done; + // parameter count is still in c_rarg6 + // and parameter pointer identifying param 1 is in c_rarg5 + __ beqz(c_rarg6, parameters_done); + + address loop = __ pc(); + __ ld(t0, c_rarg5, 0); + __ addi(c_rarg5, c_rarg5, wordSize); + __ addi(c_rarg6, c_rarg6, -1); + __ push_reg(t0); + __ bgtz(c_rarg6, loop); + + __ BIND(parameters_done); + + // call Java entry -- passing methdoOop, and current sp + // xmethod: Method* + // x30: sender sp + BLOCK_COMMENT("call Java function"); + __ mv(x30, sp); + __ jalr(c_rarg4); + + // save current address for use by exception handling code + + return_address = __ pc(); + + // store result depending on type (everything that is not + // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) + // n.b. this assumes Java returns an integral result in x10 + // and a floating result in j_farg0 + __ ld(j_rarg2, result); + Label is_long, is_float, is_double, exit; + __ ld(j_rarg1, result_type); + __ li(t0, (u1)T_OBJECT); + __ beq(j_rarg1, t0, is_long); + __ li(t0, (u1)T_LONG); + __ beq(j_rarg1, t0, is_long); + __ li(t0, (u1)T_FLOAT); + __ beq(j_rarg1, t0, is_float); + __ li(t0, (u1)T_DOUBLE); + __ beq(j_rarg1, t0, is_double); + + // handle T_INT case + __ sw(x10, Address(j_rarg2)); + + __ BIND(exit); + + // pop parameters + __ addi(esp, fp, sp_after_call_off * wordSize); + +#ifdef ASSERT + // verify that threads correspond + { + Label L, S; + __ ld(t0, thread); + __ bne(xthread, t0, S); + __ get_thread(t0); + __ beq(xthread, t0, L); + __ BIND(S); + __ stop("StubRoutines::call_stub: threads must correspond"); + __ BIND(L); + } +#endif + + // restore callee-save registers + __ ld(x27, x27_save); + __ ld(x26, x26_save); + __ ld(x25, x25_save); + __ ld(x24, x24_save); + __ ld(x23, x23_save); + __ ld(x22, x22_save); + __ ld(x21, x21_save); + __ ld(x20, x20_save); + __ ld(x19, x19_save); + __ ld(x18, x18_save); + + __ ld(x9, x9_save); + + __ ld(c_rarg0, call_wrapper); + __ ld(c_rarg1, result); + __ ld(c_rarg2, result_type); + __ ld(c_rarg3, method); + __ ld(c_rarg4, entry_point); + __ ld(c_rarg5, parameters); + __ ld(c_rarg6, parameter_size); + __ ld(c_rarg7, thread); + + // leave frame and return to caller + __ leave(); + __ ret(); + + // handle return types different from T_INT + + __ BIND(is_long); + __ sd(x10, Address(j_rarg2, 0)); + __ j(exit); + + __ BIND(is_float); + __ fsw(j_farg0, Address(j_rarg2, 0), t0); + __ j(exit); + + __ BIND(is_double); + __ fsd(j_farg0, Address(j_rarg2, 0), t0); + __ j(exit); + + return start; + } + + // Return point for a Java call if there's an exception thrown in + // Java code. The exception is caught and transformed into a + // pending exception stored in JavaThread that can be tested from + // within the VM. + // + // Note: Usually the parameters are removed by the callee. In case + // of an exception crossing an activation frame boundary, that is + // not the case if the callee is compiled code => need to setup the + // sp. + // + // x10: exception oop + + address generate_catch_exception() { + StubCodeMark mark(this, "StubRoutines", "catch_exception"); + address start = __ pc(); + + // same as in generate_call_stub(): + const Address thread(fp, thread_off * wordSize); + +#ifdef ASSERT + // verify that threads correspond + { + Label L, S; + __ ld(t0, thread); + __ bne(xthread, t0, S); + __ get_thread(t0); + __ beq(xthread, t0, L); + __ bind(S); + __ stop("StubRoutines::catch_exception: threads must correspond"); + __ bind(L); + } +#endif + + // set pending exception + __ verify_oop(x10); + + __ sd(x10, Address(xthread, Thread::pending_exception_offset())); + __ mv(t0, (address)__FILE__); + __ sd(t0, Address(xthread, Thread::exception_file_offset())); + __ mv(t0, (int)__LINE__); + __ sw(t0, Address(xthread, Thread::exception_line_offset())); + + // complete return to VM + assert(StubRoutines::_call_stub_return_address != NULL, + "_call_stub_return_address must have been generated before"); + __ j(StubRoutines::_call_stub_return_address); + + return start; + } + + // Continuation point for runtime calls returning with a pending + // exception. The pending exception check happened in the runtime + // or native call stub. The pending exception in Thread is + // converted into a Java-level exception. + // + // Contract with Java-level exception handlers: + // x10: exception + // x13: throwing pc + // + // NOTE: At entry of this stub, exception-pc must be in RA !! + + // NOTE: this is always used as a jump target within generated code + // so it just needs to be generated code with no x86 prolog + + address generate_forward_exception() { + StubCodeMark mark(this, "StubRoutines", "forward exception"); + address start = __ pc(); + + // Upon entry, RA points to the return address returning into + // Java (interpreted or compiled) code; i.e., the return address + // becomes the throwing pc. + // + // Arguments pushed before the runtime call are still on the stack + // but the exception handler will reset the stack pointer -> + // ignore them. A potential result in registers can be ignored as + // well. + +#ifdef ASSERT + // make sure this code is only executed if there is a pending exception + { + Label L; + __ ld(t0, Address(xthread, Thread::pending_exception_offset())); + __ bnez(t0, L); + __ stop("StubRoutines::forward exception: no pending exception (1)"); + __ bind(L); + } +#endif + + // compute exception handler into x9 + + // call the VM to find the handler address associated with the + // caller address. pass thread in x10 and caller pc (ret address) + // in x11. n.b. the caller pc is in ra, unlike x86 where it is on + // the stack. + __ mv(c_rarg1, ra); + // ra will be trashed by the VM call so we move it to x9 + // (callee-saved) because we also need to pass it to the handler + // returned by this call. + __ mv(x9, ra); + BLOCK_COMMENT("call exception_handler_for_return_address"); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, + SharedRuntime::exception_handler_for_return_address), + xthread, c_rarg1); + // we should not really care that ra is no longer the callee + // address. we saved the value the handler needs in x9 so we can + // just copy it to x13. however, the C2 handler will push its own + // frame and then calls into the VM and the VM code asserts that + // the PC for the frame above the handler belongs to a compiled + // Java method. So, we restore ra here to satisfy that assert. + __ mv(ra, x9); + // setup x10 & x13 & clear pending exception + __ mv(x13, x9); + __ mv(x9, x10); + __ ld(x10, Address(xthread, Thread::pending_exception_offset())); + __ sd(zr, Address(xthread, Thread::pending_exception_offset())); + +#ifdef ASSERT + // make sure exception is set + { + Label L; + __ bnez(x10, L); + __ stop("StubRoutines::forward exception: no pending exception (2)"); + __ bind(L); + } +#endif + + // continue at exception handler + // x10: exception + // x13: throwing pc + // x9: exception handler + __ verify_oop(x10); + __ jr(x9); + + return start; + } + + // Non-destructive plausibility checks for oops + // + // Arguments: + // x10: oop to verify + // t0: error message + // + // Stack after saving c_rarg3: + // [tos + 0]: saved c_rarg3 + // [tos + 1]: saved c_rarg2 + // [tos + 2]: saved ra + // [tos + 3]: saved t1 + // [tos + 4]: saved x10 + // [tos + 5]: saved t0 + address generate_verify_oop() { + + StubCodeMark mark(this, "StubRoutines", "verify_oop"); + address start = __ pc(); + + Label exit, error; + + __ push_reg(0x3000, sp); // save c_rarg2 and c_rarg3 + + __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr())); + __ ld(c_rarg3, Address(c_rarg2)); + __ add(c_rarg3, c_rarg3, 1); + __ sd(c_rarg3, Address(c_rarg2)); + + // object is in x10 + // make sure object is 'reasonable' + __ beqz(x10, exit); // if obj is NULL it is OK + +#if INCLUDE_ZGC + if (UseZGC) { + // Check if mask is good. + // verifies that ZAddressBadMask & x10 == 0 + __ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); + __ andr(c_rarg2, x10, c_rarg3); + __ bnez(c_rarg2, error); + } +#endif + + // Check if the oop is in the right area of memory + __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask()); + __ andr(c_rarg2, x10, c_rarg3); + __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits()); + + // Compare c_rarg2 and c_rarg3. + __ bne(c_rarg2, c_rarg3, error); + + // make sure klass is 'reasonable', which is not zero. + __ load_klass(x10, x10); // get klass + __ beqz(x10, error); // if klass is NULL it is broken + + // return if everything seems ok + __ bind(exit); + + __ pop_reg(0x3000, sp); // pop c_rarg2 and c_rarg3 + __ ret(); + + // handle errors + __ bind(error); + __ pop_reg(0x3000, sp); // pop c_rarg2 and c_rarg3 + + __ pusha(); + // debug(char* msg, int64_t pc, int64_t regs[]) + __ mv(c_rarg0, t0); // pass address of error message + __ mv(c_rarg1, ra); // pass return address + __ mv(c_rarg2, sp); // pass address of regs on stack +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + BLOCK_COMMENT("call MacroAssembler::debug"); + int32_t offset = 0; + __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset); + __ jalr(x1, t0, offset); + __ ebreak(); + + return start; + } + + // The inner part of zero_words(). + // + // Inputs: + // x28: the HeapWord-aligned base address of an array to zero. + // x29: the count in HeapWords, x29 > 0. + // + // Returns x28 and x29, adjusted for the caller to clear. + // x28: the base address of the tail of words left to clear. + // x29: the number of words in the tail. + // x29 < MacroAssembler::zero_words_block_size. + + address generate_zero_blocks() { + Label done; + + const Register base = x28, cnt = x29; + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "zero_blocks"); + address start = __ pc(); + + { + // Clear the remaining blocks. + Label loop; + __ sub(cnt, cnt, MacroAssembler::zero_words_block_size); + __ bltz(cnt, done); + __ bind(loop); + for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) { + __ sd(zr, Address(base, 0)); + __ add(base, base, 8); + } + __ sub(cnt, cnt, MacroAssembler::zero_words_block_size); + __ bgez(cnt, loop); + __ bind(done); + __ add(cnt, cnt, MacroAssembler::zero_words_block_size); + } + + __ ret(); + + return start; + } + + typedef enum { + copy_forwards = 1, + copy_backwards = -1 + } copy_direction; + + // Bulk copy of blocks of 8 words. + // + // count is a count of words. + // + // Precondition: count >= 8 + // + // Postconditions: + // + // The least significant bit of count contains the remaining count + // of words to copy. The rest of count is trash. + // + // s and d are adjusted to point to the remaining words to copy + // + void generate_copy_longs(Label &start, Register s, Register d, Register count, + copy_direction direction) { + int unit = wordSize * direction; + int bias = wordSize; + + const Register tmp_reg0 = x13, tmp_reg1 = x14, tmp_reg2 = x15, tmp_reg3 = x16, + tmp_reg4 = x17, tmp_reg5 = x7, tmp_reg6 = x28, tmp_reg7 = x29; + + const Register stride = x30; + + assert_different_registers(t0, tmp_reg0, tmp_reg1, tmp_reg2, tmp_reg3, + tmp_reg4, tmp_reg5, tmp_reg6, tmp_reg7); + assert_different_registers(s, d, count, t0); + + Label again, drain; + const char* stub_name = NULL; + if (direction == copy_forwards) { + stub_name = "forward_copy_longs"; + } else { + stub_name = "backward_copy_longs"; + } + StubCodeMark mark(this, "StubRoutines", stub_name); + __ align(CodeEntryAlignment); + __ bind(start); + + if (direction == copy_forwards) { + __ sub(s, s, bias); + __ sub(d, d, bias); + } + +#ifdef ASSERT + // Make sure we are never given < 8 words + { + Label L; + + __ li(t0, 8); + __ bge(count, t0, L); + __ stop("genrate_copy_longs called with < 8 words"); + __ bind(L); + } +#endif + + __ ld(tmp_reg0, Address(s, 1 * unit)); + __ ld(tmp_reg1, Address(s, 2 * unit)); + __ ld(tmp_reg2, Address(s, 3 * unit)); + __ ld(tmp_reg3, Address(s, 4 * unit)); + __ ld(tmp_reg4, Address(s, 5 * unit)); + __ ld(tmp_reg5, Address(s, 6 * unit)); + __ ld(tmp_reg6, Address(s, 7 * unit)); + __ ld(tmp_reg7, Address(s, 8 * unit)); + __ addi(s, s, 8 * unit); + + __ sub(count, count, 16); + __ bltz(count, drain); + + __ bind(again); + + __ sd(tmp_reg0, Address(d, 1 * unit)); + __ sd(tmp_reg1, Address(d, 2 * unit)); + __ sd(tmp_reg2, Address(d, 3 * unit)); + __ sd(tmp_reg3, Address(d, 4 * unit)); + __ sd(tmp_reg4, Address(d, 5 * unit)); + __ sd(tmp_reg5, Address(d, 6 * unit)); + __ sd(tmp_reg6, Address(d, 7 * unit)); + __ sd(tmp_reg7, Address(d, 8 * unit)); + + __ ld(tmp_reg0, Address(s, 1 * unit)); + __ ld(tmp_reg1, Address(s, 2 * unit)); + __ ld(tmp_reg2, Address(s, 3 * unit)); + __ ld(tmp_reg3, Address(s, 4 * unit)); + __ ld(tmp_reg4, Address(s, 5 * unit)); + __ ld(tmp_reg5, Address(s, 6 * unit)); + __ ld(tmp_reg6, Address(s, 7 * unit)); + __ ld(tmp_reg7, Address(s, 8 * unit)); + + __ addi(s, s, 8 * unit); + __ addi(d, d, 8 * unit); + + __ sub(count, count, 8); + __ bgez(count, again); + + // Drain + __ bind(drain); + + __ sd(tmp_reg0, Address(d, 1 * unit)); + __ sd(tmp_reg1, Address(d, 2 * unit)); + __ sd(tmp_reg2, Address(d, 3 * unit)); + __ sd(tmp_reg3, Address(d, 4 * unit)); + __ sd(tmp_reg4, Address(d, 5 * unit)); + __ sd(tmp_reg5, Address(d, 6 * unit)); + __ sd(tmp_reg6, Address(d, 7 * unit)); + __ sd(tmp_reg7, Address(d, 8 * unit)); + __ addi(d, d, 8 * unit); + + { + Label L1, L2; + __ andi(t0, count, 4); + __ beqz(t0, L1); + + __ ld(tmp_reg0, Address(s, 1 * unit)); + __ ld(tmp_reg1, Address(s, 2 * unit)); + __ ld(tmp_reg2, Address(s, 3 * unit)); + __ ld(tmp_reg3, Address(s, 4 * unit)); + __ addi(s, s, 4 * unit); + + __ sd(tmp_reg0, Address(d, 1 * unit)); + __ sd(tmp_reg1, Address(d, 2 * unit)); + __ sd(tmp_reg2, Address(d, 3 * unit)); + __ sd(tmp_reg3, Address(d, 4 * unit)); + __ addi(d, d, 4 * unit); + + __ bind(L1); + + if (direction == copy_forwards) { + __ addi(s, s, bias); + __ addi(d, d, bias); + } + + __ andi(t0, count, 2); + __ beqz(t0, L2); + if (direction == copy_backwards) { + __ addi(s, s, 2 * unit); + __ ld(tmp_reg0, Address(s)); + __ ld(tmp_reg1, Address(s, wordSize)); + __ addi(d, d, 2 * unit); + __ sd(tmp_reg0, Address(d)); + __ sd(tmp_reg1, Address(d, wordSize)); + } else { + __ ld(tmp_reg0, Address(s)); + __ ld(tmp_reg1, Address(s, wordSize)); + __ addi(s, s, 2 * unit); + __ sd(tmp_reg0, Address(d)); + __ sd(tmp_reg1, Address(d, wordSize)); + __ addi(d, d, 2 * unit); + } + __ bind(L2); + } + + __ ret(); + } + + Label copy_f, copy_b; + + // All-singing all-dancing memory copy. + // + // Copy count units of memory from s to d. The size of a unit is + // step, which can be positive or negative depending on the direction + // of copy. If is_aligned is false, we align the source address. + // + /* + * if (is_aligned) { + * goto copy_8_bytes; + * } + * bool is_backwards = step < 0; + * int granularity = uabs(step); + * count = count * granularity; * count bytes + * + * if (is_backwards) { + * s += count; + * d += count; + * } + * + * count limit maybe greater than 16, for better performance + * if (count < 16) { + * goto copy_small; + * } + * + * if ((dst % 8) == (src % 8)) { + * aligned; + * goto copy8; + * } + * + * copy_small: + * load element one by one; + * done; + */ + + typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp); + + void copy_memory_v(Register s, Register d, Register count, Register tmp, int step) { + bool is_backward = step < 0; + int granularity = uabs(step); + + const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17; + assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2); + Assembler::SEW sew = Assembler::elembytes_to_sew(granularity); + Label loop_forward, loop_backward, done; + + __ mv(dst, d); + __ mv(src, s); + __ mv(cnt, count); + + __ bind(loop_forward); + __ vsetvli(vl, cnt, sew, Assembler::m8); + if (is_backward) { + __ bne(vl, cnt, loop_backward); + } + + __ vlex_v(v0, src, sew); + __ sub(cnt, cnt, vl); + __ slli(vl, vl, (int)sew); + __ add(src, src, vl); + + __ vsex_v(v0, dst, sew); + __ add(dst, dst, vl); + __ bnez(cnt, loop_forward); + + if (is_backward) { + __ j(done); + + __ bind(loop_backward); + __ sub(tmp, cnt, vl); + __ slli(tmp, tmp, sew); + __ add(tmp1, s, tmp); + __ vlex_v(v0, tmp1, sew); + __ add(tmp2, d, tmp); + __ vsex_v(v0, tmp2, sew); + __ sub(cnt, cnt, vl); + __ bnez(cnt, loop_forward); + __ bind(done); + } + } + + void copy_memory(bool is_aligned, Register s, Register d, + Register count, Register tmp, int step) { + if (UseRVV) { + return copy_memory_v(s, d, count, tmp, step); + } + + bool is_backwards = step < 0; + int granularity = uabs(step); + + const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17; + + Label same_aligned; + Label copy8, copy_small, done; + + copy_insn ld_arr = NULL, st_arr = NULL; + switch (granularity) { + case 1 : + ld_arr = (copy_insn)&MacroAssembler::lbu; + st_arr = (copy_insn)&MacroAssembler::sb; + break; + case 2 : + ld_arr = (copy_insn)&MacroAssembler::lhu; + st_arr = (copy_insn)&MacroAssembler::sh; + break; + case 4 : + ld_arr = (copy_insn)&MacroAssembler::lwu; + st_arr = (copy_insn)&MacroAssembler::sw; + break; + case 8 : + ld_arr = (copy_insn)&MacroAssembler::ld; + st_arr = (copy_insn)&MacroAssembler::sd; + break; + default : + ShouldNotReachHere(); + } + + __ beqz(count, done); + __ slli(cnt, count, exact_log2(granularity)); + if (is_backwards) { + __ add(src, s, cnt); + __ add(dst, d, cnt); + } else { + __ mv(src, s); + __ mv(dst, d); + } + + if (is_aligned) { + __ addi(tmp, cnt, -8); + __ bgez(tmp, copy8); + __ j(copy_small); + } + + __ mv(tmp, 16); + __ blt(cnt, tmp, copy_small); + + __ xorr(tmp, src, dst); + __ andi(tmp, tmp, 0b111); + __ bnez(tmp, copy_small); + + __ bind(same_aligned); + __ andi(tmp, src, 0b111); + __ beqz(tmp, copy8); + if (is_backwards) { + __ addi(src, src, step); + __ addi(dst, dst, step); + } + (_masm->*ld_arr)(tmp3, Address(src), t0); + (_masm->*st_arr)(tmp3, Address(dst), t0); + if (!is_backwards) { + __ addi(src, src, step); + __ addi(dst, dst, step); + } + __ addi(cnt, cnt, -granularity); + __ beqz(cnt, done); + __ j(same_aligned); + + __ bind(copy8); + if (is_backwards) { + __ addi(src, src, -wordSize); + __ addi(dst, dst, -wordSize); + } + __ ld(tmp3, Address(src)); + __ sd(tmp3, Address(dst)); + if (!is_backwards) { + __ addi(src, src, wordSize); + __ addi(dst, dst, wordSize); + } + __ addi(cnt, cnt, -wordSize); + __ addi(tmp4, cnt, -8); + __ bgez(tmp4, copy8); // cnt >= 8, do next loop + + __ beqz(cnt, done); + + __ bind(copy_small); + if (is_backwards) { + __ addi(src, src, step); + __ addi(dst, dst, step); + } + (_masm->*ld_arr)(tmp3, Address(src), t0); + (_masm->*st_arr)(tmp3, Address(dst), t0); + if (!is_backwards) { + __ addi(src, src, step); + __ addi(dst, dst, step); + } + __ addi(cnt, cnt, -granularity); + __ bgtz(cnt, copy_small); + + __ bind(done); + } + + // Scan over array at a for count oops, verifying each one. + // Preserves a and count, clobbers t0 and t1. + void verify_oop_array(size_t size, Register a, Register count, Register temp) { + Label loop, end; + __ mv(t1, zr); + __ slli(t0, count, exact_log2(size)); + __ bind(loop); + __ bgeu(t1, t0, end); + + __ add(temp, a, t1); + if (size == (size_t)wordSize) { + __ ld(temp, Address(temp, 0)); + __ verify_oop(temp); + } else { + __ lwu(temp, Address(temp, 0)); + __ decode_heap_oop(temp); // calls verify_oop + } + __ add(t1, t1, size); + __ j(loop); + __ bind(end); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_int_oop_copy(). + // + address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry, + const char* name, bool dest_uninitialized = false) { + const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; + RegSet saved_reg = RegSet::of(s, d, count); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + __ enter(); + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + BLOCK_COMMENT("Entry:"); + } + + DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + if (aligned) { + decorators |= ARRAYCOPY_ALIGNED; + } + + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg); + + if (is_oop) { + // save regs before copy_memory + __ push_reg(RegSet::of(d, count), sp); + } + + { + // UnsafeCopyMemory page error: continue after ucm + bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); + UnsafeCopyMemoryMark ucmm(this, add_entry, true); + copy_memory(aligned, s, d, count, t0, size); + } + + if (is_oop) { + __ pop_reg(RegSet::of(d, count), sp); + if (VerifyOops) { + verify_oop_array(size, d, count, t2); + } + } + + bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet()); + + __ leave(); + __ mv(x10, zr); // return 0 + __ ret(); + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target, + address* entry, const char* name, + bool dest_uninitialized = false) { + const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; + RegSet saved_regs = RegSet::of(s, d, count); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + __ enter(); + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + BLOCK_COMMENT("Entry:"); + } + + // use fwd copy when (d-s) above_equal (count*size) + __ sub(t0, d, s); + __ slli(t1, count, exact_log2(size)); + __ bgeu(t0, t1, nooverlap_target); + + DecoratorSet decorators = IN_HEAP | IS_ARRAY; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + if (aligned) { + decorators |= ARRAYCOPY_ALIGNED; + } + + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs); + + if (is_oop) { + // save regs before copy_memory + __ push_reg(RegSet::of(d, count), sp); + } + + { + // UnsafeCopyMemory page error: continue after ucm + bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); + UnsafeCopyMemoryMark ucmm(this, add_entry, true); + copy_memory(aligned, s, d, count, t0, -size); + } + + if (is_oop) { + __ pop_reg(RegSet::of(d, count), sp); + if (VerifyOops) { + verify_oop_array(size, d, count, t2); + } + } + bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet()); + __ leave(); + __ mv(x10, zr); // return 0 + __ ret(); + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + // Side Effects: + // disjoint_byte_copy_entry is set to the no-overlap entry point // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + // Side Effects: + // disjoint_byte_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_byte_copy(). + // + address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) { + const bool not_oop = false; + return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, + address* entry, const char* name) { + const bool not_oop = false; + return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we + // let the hardware handle it. The two or four words within dwords + // or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + // Side Effects: + // disjoint_short_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_short_copy(). + // + address generate_disjoint_short_copy(bool aligned, + address* entry, const char* name) { + const bool not_oop = false; + return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we + // let the hardware handle it. The two or four words within dwords + // or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + address generate_conjoint_short_copy(bool aligned, address nooverlap_target, + address* entry, const char* name) { + const bool not_oop = false; + return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_int_oop_copy(). + // + address generate_disjoint_int_copy(bool aligned, address* entry, + const char* name, bool dest_uninitialized = false) { + const bool not_oop = false; + return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_int_copy(bool aligned, address nooverlap_target, + address* entry, const char* name, + bool dest_uninitialized = false) { + const bool not_oop = false; + return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name); + } + + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as size_t, can be zero + // + // Side Effects: + // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the + // no-overlap entry point used by generate_conjoint_long_oop_copy(). + // + address generate_disjoint_long_copy(bool aligned, address* entry, + const char* name, bool dest_uninitialized = false) { + const bool not_oop = false; + return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as size_t, can be zero + // + address generate_conjoint_long_copy(bool aligned, + address nooverlap_target, address* entry, + const char* name, bool dest_uninitialized = false) { + const bool not_oop = false; + return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as size_t, can be zero + // + // Side Effects: + // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the + // no-overlap entry point used by generate_conjoint_long_oop_copy(). + // + address generate_disjoint_oop_copy(bool aligned, address* entry, + const char* name, bool dest_uninitialized) { + const bool is_oop = true; + const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); + return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as size_t, can be zero + // + address generate_conjoint_oop_copy(bool aligned, + address nooverlap_target, address* entry, + const char* name, bool dest_uninitialized) { + const bool is_oop = true; + const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); + return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry, + name, dest_uninitialized); + } + + // Helper for generating a dynamic type check. + // Smashes t0, t1. + void generate_type_check(Register sub_klass, + Register super_check_offset, + Register super_klass, + Label& L_success) { + assert_different_registers(sub_klass, super_check_offset, super_klass); + + BLOCK_COMMENT("type_check:"); + + Label L_miss; + + __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset); + __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL); + + // Fall through on failure! + __ BIND(L_miss); + } + + // + // Generate checkcasting array copy stub + // + // Input: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // c_rarg3 - size_t ckoff (super_check_offset) + // c_rarg4 - oop ckval (super_klass) + // + // Output: + // x10 == 0 - success + // x10 == -1^K - failure, where K is partial transfer count + // + address generate_checkcast_copy(const char* name, address* entry, + bool dest_uninitialized = false) { + Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop; + + // Input registers (after setup_arg_regs) + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register count = c_rarg2; // elementscount + const Register ckoff = c_rarg3; // super_check_offset + const Register ckval = c_rarg4; // super_klass + + RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4); + RegSet wb_post_saved_regs = RegSet::of(count); + + // Registers used as temps (x7, x9, x18 are save-on-entry) + const Register count_save = x19; // orig elementscount + const Register start_to = x18; // destination array start address + const Register copied_oop = x7; // actual oop copied + const Register r9_klass = x9; // oop._klass + + //--------------------------------------------------------------- + // Assembler stub will be used for this call to arraycopy + // if the two arrays are subtypes of Object[] but the + // destination array type is not equal to or a supertype + // of the source type. Each element must be separately + // checked. + + assert_different_registers(from, to, count, ckoff, ckval, start_to, + copied_oop, r9_klass, count_save); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + // Caller of this entry point must set up the argument registers. + if (entry != NULL) { + *entry = __ pc(); + BLOCK_COMMENT("Entry:"); + } + + // Empty array: Nothing to do + __ beqz(count, L_done); + + __ push_reg(RegSet::of(x7, x9, x18, x19), sp); + +#ifdef ASSERT + BLOCK_COMMENT("assert consistent ckoff/ckval"); + // The ckoff and ckval must be mutually consistent, + // even though caller generates both. + { Label L; + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + __ lwu(start_to, Address(ckval, sco_offset)); + __ beq(ckoff, start_to, L); + __ stop("super_check_offset inconsistent"); + __ bind(L); + } +#endif //ASSERT + + DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT; + bool is_oop = true; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs); + + // save the original count + __ mv(count_save, count); + + // Copy from low to high addresses + __ mv(start_to, to); // Save destination array start address + __ j(L_load_element); + + // ======== begin loop ======== + // (Loop is rotated; its entry is L_load_element.) + // Loop control: + // for count to 0 do + // copied_oop = load_heap_oop(from++) + // ... generate_type_check ... + // store_heap_oop(to++, copied_oop) + // end + + __ align(OptoLoopAlignment); + + __ BIND(L_store_element); + __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, AS_RAW); // store the oop + __ add(to, to, UseCompressedOops ? 4 : 8); + __ sub(count, count, 1); + __ beqz(count, L_do_card_marks); + + // ======== loop entry is here ======== + __ BIND(L_load_element); + __ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop + __ add(from, from, UseCompressedOops ? 4 : 8); + __ beqz(copied_oop, L_store_element); + + __ load_klass(r9_klass, copied_oop);// query the object klass + generate_type_check(r9_klass, ckoff, ckval, L_store_element); + // ======== end loop ======== + + // It was a real error; we must depend on the caller to finish the job. + // Register count = remaining oops, count_orig = total oops. + // Emit GC store barriers for the oops we have copied and report + // their number to the caller. + + __ sub(count, count_save, count); // K = partially copied oop count + __ xori(count, count, -1); // report (-1^K) to caller + __ beqz(count, L_done_pop); + + __ BIND(L_do_card_marks); + bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs); + + __ bind(L_done_pop); + __ pop_reg(RegSet::of(x7, x9, x18, x19), sp); + inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); + + __ bind(L_done); + __ mv(x10, count); + __ leave(); + __ ret(); + + return start; + } + + // Perform range checks on the proposed arraycopy. + // Kills temp, but nothing else. + // Also, clean the sign bits of src_pos and dst_pos. + void arraycopy_range_checks(Register src, // source array oop (c_rarg0) + Register src_pos, // source position (c_rarg1) + Register dst, // destination array oo (c_rarg2) + Register dst_pos, // destination position (c_rarg3) + Register length, + Register temp, + Label& L_failed) { + BLOCK_COMMENT("arraycopy_range_checks:"); + + assert_different_registers(t0, temp); + + // if [src_pos + length > arrayOop(src)->length()] then FAIL + __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes())); + __ addw(temp, length, src_pos); + __ bgtu(temp, t0, L_failed); + + // if [dst_pos + length > arrayOop(dst)->length()] then FAIL + __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes())); + __ addw(temp, length, dst_pos); + __ bgtu(temp, t0, L_failed); + + // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'. + __ zero_extend(src_pos, src_pos, 32); + __ zero_extend(dst_pos, dst_pos, 32); + + BLOCK_COMMENT("arraycopy_range_checks done"); + } + + // + // Generate 'unsafe' array copy stub + // Though just as safe as the other stubs, it takes an unscaled + // size_t argument instead of an element count. + // + // Input: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - byte count, treated as ssize_t, can be zero + // + // Examines the alignment of the operands and dispatches + // to a long, int, short, or byte copy loop. + // + address generate_unsafe_copy(const char* name, + address byte_copy_entry, + address short_copy_entry, + address int_copy_entry, + address long_copy_entry) { + assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL && + int_copy_entry != NULL && long_copy_entry != NULL); + Label L_long_aligned, L_int_aligned, L_short_aligned; + const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + // bump this on entry, not on exit: + inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); + + __ orr(t0, s, d); + __ orr(t0, t0, count); + + __ andi(t0, t0, BytesPerLong - 1); + __ beqz(t0, L_long_aligned); + __ andi(t0, t0, BytesPerInt - 1); + __ beqz(t0, L_int_aligned); + __ andi(t0, t0, 1); + __ beqz(t0, L_short_aligned); + __ j(RuntimeAddress(byte_copy_entry)); + + __ BIND(L_short_aligned); + __ srli(count, count, LogBytesPerShort); // size => short_count + __ j(RuntimeAddress(short_copy_entry)); + __ BIND(L_int_aligned); + __ srli(count, count, LogBytesPerInt); // size => int_count + __ j(RuntimeAddress(int_copy_entry)); + __ BIND(L_long_aligned); + __ srli(count, count, LogBytesPerLong); // size => long_count + __ j(RuntimeAddress(long_copy_entry)); + + return start; + } + + // + // Generate generic array copy stubs + // + // Input: + // c_rarg0 - src oop + // c_rarg1 - src_pos (32-bits) + // c_rarg2 - dst oop + // c_rarg3 - dst_pos (32-bits) + // c_rarg4 - element count (32-bits) + // + // Output: + // x10 == 0 - success + // x10 == -1^K - failure, where K is partial transfer count + // + address generate_generic_copy(const char* name, + address byte_copy_entry, address short_copy_entry, + address int_copy_entry, address oop_copy_entry, + address long_copy_entry, address checkcast_copy_entry) { + assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL && + int_copy_entry != NULL && oop_copy_entry != NULL && + long_copy_entry != NULL && checkcast_copy_entry != NULL); + Label L_failed, L_failed_0, L_objArray; + Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; + + // Input registers + const Register src = c_rarg0; // source array oop + const Register src_pos = c_rarg1; // source position + const Register dst = c_rarg2; // destination array oop + const Register dst_pos = c_rarg3; // destination position + const Register length = c_rarg4; + + // Registers used as temps + const Register dst_klass = c_rarg5; + + __ align(CodeEntryAlignment); + + StubCodeMark mark(this, "StubRoutines", name); + + address start = __ pc(); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + // bump this on entry, not on exit: + inc_counter_np(SharedRuntime::_generic_array_copy_ctr); + + //----------------------------------------------------------------------- + // Assembler stub will be used for this call to arraycopy + // if the following conditions are met: + // + // (1) src and dst must not be null. + // (2) src_pos must not be negative. + // (3) dst_pos must not be negative. + // (4) length must not be negative. + // (5) src klass and dst klass should be the same and not NULL. + // (6) src and dst should be arrays. + // (7) src_pos + length must not exceed length of src. + // (8) dst_pos + length must not exceed length of dst. + // + + // if [src == NULL] then return -1 + __ beqz(src, L_failed); + + // if [src_pos < 0] then return -1 + // i.e. sign bit set + __ andi(t0, src_pos, 1UL << 31); + __ bnez(t0, L_failed); + + // if [dst == NULL] then return -1 + __ beqz(dst, L_failed); + + // if [dst_pos < 0] then return -1 + // i.e. sign bit set + __ andi(t0, dst_pos, 1UL << 31); + __ bnez(t0, L_failed); + + // registers used as temp + const Register scratch_length = x28; // elements count to copy + const Register scratch_src_klass = x29; // array klass + const Register lh = x30; // layout helper + + // if [length < 0] then return -1 + __ addw(scratch_length, length, zr); // length (elements count, 32-bits value) + // i.e. sign bit set + __ andi(t0, scratch_length, 1UL << 31); + __ bnez(t0, L_failed); + + __ load_klass(scratch_src_klass, src); +#ifdef ASSERT + { + BLOCK_COMMENT("assert klasses not null {"); + Label L1, L2; + __ bnez(scratch_src_klass, L2); // it is broken if klass is NULL + __ bind(L1); + __ stop("broken null klass"); + __ bind(L2); + __ load_klass(t0, dst); + __ beqz(t0, L1); // this would be broken also + BLOCK_COMMENT("} assert klasses not null done"); + } +#endif + + // Load layout helper (32-bits) + // + // |array_tag| | header_size | element_type | |log2_element_size| + // 32 30 24 16 8 2 0 + // + // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 + // + + const int lh_offset = in_bytes(Klass::layout_helper_offset()); + + // Handle objArrays completely differently... + const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); + __ lw(lh, Address(scratch_src_klass, lh_offset)); + __ mvw(t0, objArray_lh); + __ beq(lh, t0, L_objArray); + + // if [src->klass() != dst->klass()] then return -1 + __ load_klass(t1, dst); + __ bne(t1, scratch_src_klass, L_failed); + + // if [src->is_Array() != NULL] then return -1 + // i.e. (lh >= 0) + __ andi(t0, lh, 1UL << 31); + __ beqz(t0, L_failed); + + // At this point, it is known to be a typeArray (array_tag 0x3). +#ifdef ASSERT + { + BLOCK_COMMENT("assert primitive array {"); + Label L; + __ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift); + __ bge(lh, t1, L); + __ stop("must be a primitive array"); + __ bind(L); + BLOCK_COMMENT("} assert primitive array done"); + } +#endif + + arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, + t1, L_failed); + + // TypeArrayKlass + // + // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize) + // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize) + // + + const Register t0_offset = t0; // array offset + const Register x22_elsize = lh; // element size + + // Get array_header_in_bytes() + int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1); + int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width; + __ slli(t0_offset, lh, XLEN - lh_header_size_msb); // left shift to remove 24 ~ 32; + __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset + + __ add(src, src, t0_offset); // src array offset + __ add(dst, dst, t0_offset); // dst array offset + BLOCK_COMMENT("choose copy loop based on element size"); + + // next registers should be set before the jump to corresponding stub + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register count = c_rarg2; // elements count + + // 'from', 'to', 'count' registers should be set in such order + // since they are the same as 'src', 'src_pos', 'dst'. + + assert(Klass::_lh_log2_element_size_shift == 0, "fix this code"); + + // The possible values of elsize are 0-3, i.e. exact_log2(element + // size in bytes). We do a simple bitwise binary search. + __ BIND(L_copy_bytes); + __ andi(t0, x22_elsize, 2); + __ bnez(t0, L_copy_ints); + __ andi(t0, x22_elsize, 1); + __ bnez(t0, L_copy_shorts); + __ add(from, src, src_pos); // src_addr + __ add(to, dst, dst_pos); // dst_addr + __ addw(count, scratch_length, zr); // length + __ j(RuntimeAddress(byte_copy_entry)); + + __ BIND(L_copy_shorts); + __ shadd(from, src_pos, src, t0, 1); // src_addr + __ shadd(to, dst_pos, dst, t0, 1); // dst_addr + __ addw(count, scratch_length, zr); // length + __ j(RuntimeAddress(short_copy_entry)); + + __ BIND(L_copy_ints); + __ andi(t0, x22_elsize, 1); + __ bnez(t0, L_copy_longs); + __ shadd(from, src_pos, src, t0, 2); // src_addr + __ shadd(to, dst_pos, dst, t0, 2); // dst_addr + __ addw(count, scratch_length, zr); // length + __ j(RuntimeAddress(int_copy_entry)); + + __ BIND(L_copy_longs); +#ifdef ASSERT + { + BLOCK_COMMENT("assert long copy {"); + Label L; + __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize + __ addw(lh, lh, zr); + __ mvw(t0, LogBytesPerLong); + __ beq(x22_elsize, t0, L); + __ stop("must be long copy, but elsize is wrong"); + __ bind(L); + BLOCK_COMMENT("} assert long copy done"); + } +#endif + __ shadd(from, src_pos, src, t0, 3); // src_addr + __ shadd(to, dst_pos, dst, t0, 3); // dst_addr + __ addw(count, scratch_length, zr); // length + __ j(RuntimeAddress(long_copy_entry)); + + // ObjArrayKlass + __ BIND(L_objArray); + // live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos] + + Label L_plain_copy, L_checkcast_copy; + // test array classes for subtyping + __ load_klass(t2, dst); + __ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality + + // Identically typed arrays can be copied without element-wise checks. + arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, + t1, L_failed); + + __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop); + __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop); + __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ addw(count, scratch_length, zr); // length + __ BIND(L_plain_copy); + __ j(RuntimeAddress(oop_copy_entry)); + + __ BIND(L_checkcast_copy); + // live at this point: scratch_src_klass, scratch_length, t2 (dst_klass) + { + // Before looking at dst.length, make sure dst is also an objArray. + __ lwu(t0, Address(t2, lh_offset)); + __ mvw(t1, objArray_lh); + __ bne(t0, t1, L_failed); + + // It is safe to examine both src.length and dst.length. + arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, + t2, L_failed); + + __ load_klass(dst_klass, dst); // reload + + // Marshal the base address arguments now, freeing registers. + __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop); + __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop); + __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ addw(count, length, zr); // length (reloaded) + const Register sco_temp = c_rarg3; // this register is free now + assert_different_registers(from, to, count, sco_temp, + dst_klass, scratch_src_klass); + + // Generate the type check. + const int sco_offset = in_bytes(Klass::super_check_offset_offset()); + __ lwu(sco_temp, Address(dst_klass, sco_offset)); + + // Smashes t0, t1 + generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy); + + // Fetch destination element klass from the ObjArrayKlass header. + int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); + __ ld(dst_klass, Address(dst_klass, ek_offset)); + __ lwu(sco_temp, Address(dst_klass, sco_offset)); + + // the checkcast_copy loop needs two extra arguments: + assert(c_rarg3 == sco_temp, "#3 already in place"); + // Set up arguments for checkcast_copy_entry. + __ mv(c_rarg4, dst_klass); // dst.klass.element_klass + __ j(RuntimeAddress(checkcast_copy_entry)); + } + + __ BIND(L_failed); + __ li(x10, -1); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(); + + return start; + } + + // + // Generate stub for array fill. If "aligned" is true, the + // "to" address is assumed to be heapword aligned. + // + // Arguments for generated stub: + // to: c_rarg0 + // value: c_rarg1 + // count: c_rarg2 treated as signed + // + address generate_fill(BasicType t, bool aligned, const char* name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + BLOCK_COMMENT("Entry:"); + + const Register to = c_rarg0; // source array address + const Register value = c_rarg1; // value + const Register count = c_rarg2; // elements count + + const Register bz_base = x28; // base for block_zero routine + const Register cnt_words = x29; // temp register + const Register tmp_reg = t1; + + __ enter(); + + Label L_fill_elements, L_exit1; + + int shift = -1; + switch (t) { + case T_BYTE: + shift = 0; + + // Zero extend value + // 8 bit -> 16 bit + __ andi(value, value, 0xff); + __ mv(tmp_reg, value); + __ slli(tmp_reg, tmp_reg, 8); + __ orr(value, value, tmp_reg); + + // 16 bit -> 32 bit + __ mv(tmp_reg, value); + __ slli(tmp_reg, tmp_reg, 16); + __ orr(value, value, tmp_reg); + + __ mv(tmp_reg, 8 >> shift); // Short arrays (< 8 bytes) fill by element + __ bltu(count, tmp_reg, L_fill_elements); + break; + case T_SHORT: + shift = 1; + // Zero extend value + // 16 bit -> 32 bit + __ andi(value, value, 0xffff); + __ mv(tmp_reg, value); + __ slli(tmp_reg, tmp_reg, 16); + __ orr(value, value, tmp_reg); + + // Short arrays (< 8 bytes) fill by element + __ mv(tmp_reg, 8 >> shift); + __ bltu(count, tmp_reg, L_fill_elements); + break; + case T_INT: + shift = 2; + + // Short arrays (< 8 bytes) fill by element + __ mv(tmp_reg, 8 >> shift); + __ bltu(count, tmp_reg, L_fill_elements); + break; + default: ShouldNotReachHere(); + } + + // Align source address at 8 bytes address boundary. + Label L_skip_align1, L_skip_align2, L_skip_align4; + if (!aligned) { + switch (t) { + case T_BYTE: + // One byte misalignment happens only for byte arrays. + __ andi(t0, to, 1); + __ beqz(t0, L_skip_align1); + __ sb(value, Address(to, 0)); + __ addi(to, to, 1); + __ addiw(count, count, -1); + __ bind(L_skip_align1); + // Fallthrough + case T_SHORT: + // Two bytes misalignment happens only for byte and short (char) arrays. + __ andi(t0, to, 2); + __ beqz(t0, L_skip_align2); + __ sh(value, Address(to, 0)); + __ addi(to, to, 2); + __ addiw(count, count, -(2 >> shift)); + __ bind(L_skip_align2); + // Fallthrough + case T_INT: + // Align to 8 bytes, we know we are 4 byte aligned to start. + __ andi(t0, to, 4); + __ beqz(t0, L_skip_align4); + __ sw(value, Address(to, 0)); + __ addi(to, to, 4); + __ addiw(count, count, -(4 >> shift)); + __ bind(L_skip_align4); + break; + default: ShouldNotReachHere(); + } + } + + // + // Fill large chunks + // + __ srliw(cnt_words, count, 3 - shift); // number of words + + // 32 bit -> 64 bit + __ andi(value, value, 0xffffffff); + __ mv(tmp_reg, value); + __ slli(tmp_reg, tmp_reg, 32); + __ orr(value, value, tmp_reg); + + __ slli(tmp_reg, cnt_words, 3 - shift); + __ subw(count, count, tmp_reg); + { + __ fill_words(to, cnt_words, value); + } + + // Remaining count is less than 8 bytes. Fill it by a single store. + // Note that the total length is no less than 8 bytes. + if (t == T_BYTE || t == T_SHORT) { + __ beqz(count, L_exit1); + __ shadd(to, count, to, tmp_reg, shift); // points to the end + __ sd(value, Address(to, -8)); // overwrite some elements + __ bind(L_exit1); + __ leave(); + __ ret(); + } + + // Handle copies less than 8 bytes. + Label L_fill_2, L_fill_4, L_exit2; + __ bind(L_fill_elements); + switch (t) { + case T_BYTE: + __ andi(t0, count, 1); + __ beqz(t0, L_fill_2); + __ sb(value, Address(to, 0)); + __ addi(to, to, 1); + __ bind(L_fill_2); + __ andi(t0, count, 2); + __ beqz(t0, L_fill_4); + __ sh(value, Address(to, 0)); + __ addi(to, to, 2); + __ bind(L_fill_4); + __ andi(t0, count, 4); + __ beqz(t0, L_exit2); + __ sw(value, Address(to, 0)); + break; + case T_SHORT: + __ andi(t0, count, 1); + __ beqz(t0, L_fill_4); + __ sh(value, Address(to, 0)); + __ addi(to, to, 2); + __ bind(L_fill_4); + __ andi(t0, count, 2); + __ beqz(t0, L_exit2); + __ sw(value, Address(to, 0)); + break; + case T_INT: + __ beqz(count, L_exit2); + __ sw(value, Address(to, 0)); + break; + default: ShouldNotReachHere(); + } + __ bind(L_exit2); + __ leave(); + __ ret(); + return start; + } + + void generate_arraycopy_stubs() { + address entry = NULL; + address entry_jbyte_arraycopy = NULL; + address entry_jshort_arraycopy = NULL; + address entry_jint_arraycopy = NULL; + address entry_oop_arraycopy = NULL; + address entry_jlong_arraycopy = NULL; + address entry_checkcast_arraycopy = NULL; + + generate_copy_longs(copy_f, c_rarg0, c_rarg1, t1, copy_forwards); + generate_copy_longs(copy_b, c_rarg0, c_rarg1, t1, copy_backwards); + + StubRoutines::riscv::_zero_blocks = generate_zero_blocks(); + + //*** jbyte + // Always need aligned and unaligned versions + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry, + "jbyte_disjoint_arraycopy"); + StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry, + &entry_jbyte_arraycopy, + "jbyte_arraycopy"); + StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry, + "arrayof_jbyte_disjoint_arraycopy"); + StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL, + "arrayof_jbyte_arraycopy"); + + //*** jshort + // Always need aligned and unaligned versions + StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry, + "jshort_disjoint_arraycopy"); + StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, + &entry_jshort_arraycopy, + "jshort_arraycopy"); + StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry, + "arrayof_jshort_disjoint_arraycopy"); + StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL, + "arrayof_jshort_arraycopy"); + + //*** jint + // Aligned versions + StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry, + "arrayof_jint_disjoint_arraycopy"); + StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy, + "arrayof_jint_arraycopy"); + // In 64 bit we need both aligned and unaligned versions of jint arraycopy. + // entry_jint_arraycopy always points to the unaligned version + StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry, + "jint_disjoint_arraycopy"); + StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry, + &entry_jint_arraycopy, + "jint_arraycopy"); + + //*** jlong + // It is always aligned + StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry, + "arrayof_jlong_disjoint_arraycopy"); + StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy, + "arrayof_jlong_arraycopy"); + StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy; + StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy; + + //*** oops + { + // With compressed oops we need unaligned versions; notice that + // we overwrite entry_oop_arraycopy. + bool aligned = !UseCompressedOops; + + StubRoutines::_arrayof_oop_disjoint_arraycopy + = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy", + /*dest_uninitialized*/false); + StubRoutines::_arrayof_oop_arraycopy + = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy", + /*dest_uninitialized*/false); + // Aligned versions without pre-barriers + StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit + = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit", + /*dest_uninitialized*/true); + StubRoutines::_arrayof_oop_arraycopy_uninit + = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit", + /*dest_uninitialized*/true); + } + + StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy; + StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy; + StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit; + StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit; + + StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); + StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, + /*dest_uninitialized*/true); + + + StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy", + entry_jbyte_arraycopy, + entry_jshort_arraycopy, + entry_jint_arraycopy, + entry_jlong_arraycopy); + + StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy", + entry_jbyte_arraycopy, + entry_jshort_arraycopy, + entry_jint_arraycopy, + entry_oop_arraycopy, + entry_jlong_arraycopy, + entry_checkcast_arraycopy); + + StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); + StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); + StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); + StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); + StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); + StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); + } + + // Safefetch stubs. + void generate_safefetch(const char* name, int size, address* entry, + address* fault_pc, address* continuation_pc) { + // safefetch signatures: + // int SafeFetch32(int* adr, int errValue) + // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue) + // + // arguments: + // c_rarg0 = adr + // c_rarg1 = errValue + // + // result: + // PPC_RET = *adr or errValue + assert_cond(entry != NULL && fault_pc != NULL && continuation_pc != NULL); + StubCodeMark mark(this, "StubRoutines", name); + + // Entry point, pc or function descriptor. + *entry = __ pc(); + + // Load *adr into c_rarg1, may fault. + *fault_pc = __ pc(); + switch (size) { + case 4: + // int32_t + __ lw(c_rarg1, Address(c_rarg0, 0)); + break; + case 8: + // int64_t + __ ld(c_rarg1, Address(c_rarg0, 0)); + break; + default: + ShouldNotReachHere(); + } + + // return errValue or *adr + *continuation_pc = __ pc(); + __ mv(x10, c_rarg1); + __ ret(); + } + + // code for comparing 16 bytes of strings with same encoding + void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) { + const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31; + __ ld(tmp5, Address(str1)); + __ addi(str1, str1, 8); + __ xorr(tmp4, tmp1, tmp2); + __ ld(cnt1, Address(str2)); + __ addi(str2, str2, 8); + __ bnez(tmp4, DIFF1); + __ ld(tmp1, Address(str1)); + __ addi(str1, str1, 8); + __ xorr(tmp4, tmp5, cnt1); + __ ld(tmp2, Address(str2)); + __ addi(str2, str2, 8); + __ bnez(tmp4, DIFF2); + } + + // code for comparing 8 characters of strings with Latin1 and Utf16 encoding + void compare_string_8_x_LU(Register tmpL, Register tmpU, Label &DIFF1, + Label &DIFF2) { + const Register strU = x12, curU = x7, strL = x29, tmp = x30; + __ ld(tmpL, Address(strL)); + __ addi(strL, strL, 8); + __ ld(tmpU, Address(strU)); + __ addi(strU, strU, 8); + __ inflate_lo32(tmp, tmpL); + __ mv(t0, tmp); + __ xorr(tmp, curU, t0); + __ bnez(tmp, DIFF2); + + __ ld(curU, Address(strU)); + __ addi(strU, strU, 8); + __ inflate_hi32(tmp, tmpL); + __ mv(t0, tmp); + __ xorr(tmp, tmpU, t0); + __ bnez(tmp, DIFF1); + } + + // x10 = result + // x11 = str1 + // x12 = cnt1 + // x13 = str2 + // x14 = cnt2 + // x28 = tmp1 + // x29 = tmp2 + // x30 = tmp3 + address generate_compare_long_string_different_encoding(bool isLU) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", isLU ? "compare_long_string_different_encoding LU" : "compare_long_string_different_encoding UL"); + address entry = __ pc(); + Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2, + DONE, CALCULATE_DIFFERENCE; + const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14, + tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31; + RegSet spilled_regs = RegSet::of(tmp4, tmp5); + + // cnt2 == amount of characters left to compare + // Check already loaded first 4 symbols + __ inflate_lo32(tmp3, isLU ? tmp1 : tmp2); + __ mv(isLU ? tmp1 : tmp2, tmp3); + __ addi(str1, str1, isLU ? wordSize / 2 : wordSize); + __ addi(str2, str2, isLU ? wordSize : wordSize / 2); + __ sub(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case. + __ push_reg(spilled_regs, sp); + + if (isLU) { + __ add(str1, str1, cnt2); + __ shadd(str2, cnt2, str2, t0, 1); + } else { + __ shadd(str1, cnt2, str1, t0, 1); + __ add(str2, str2, cnt2); + } + __ xorr(tmp3, tmp1, tmp2); + __ mv(tmp5, tmp2); + __ bnez(tmp3, CALCULATE_DIFFERENCE); + + Register strU = isLU ? str2 : str1, + strL = isLU ? str1 : str2, + tmpU = isLU ? tmp5 : tmp1, // where to keep U for comparison + tmpL = isLU ? tmp1 : tmp5; // where to keep L for comparison + + __ sub(tmp2, strL, cnt2); // strL pointer to load from + __ slli(t0, cnt2, 1); + __ sub(cnt1, strU, t0); // strU pointer to load from + + __ ld(tmp4, Address(cnt1)); + __ addi(cnt1, cnt1, 8); + __ beqz(cnt2, LOAD_LAST); // no characters left except last load + __ sub(cnt2, cnt2, 16); + __ bltz(cnt2, TAIL); + __ bind(SMALL_LOOP); // smaller loop + __ sub(cnt2, cnt2, 16); + compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); + compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); + __ bgez(cnt2, SMALL_LOOP); + __ addi(t0, cnt2, 16); + __ beqz(t0, LOAD_LAST); + __ bind(TAIL); // 1..15 characters left until last load (last 4 characters) + // Address of 8 bytes before last 4 characters in UTF-16 string + __ shadd(cnt1, cnt2, cnt1, t0, 1); + // Address of 16 bytes before last 4 characters in Latin1 string + __ add(tmp2, tmp2, cnt2); + __ ld(tmp4, Address(cnt1, -8)); + // last 16 characters before last load + compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); + compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); + __ j(LOAD_LAST); + __ bind(DIFF2); + __ mv(tmpU, tmp4); + __ bind(DIFF1); + __ mv(tmpL, t0); + __ j(CALCULATE_DIFFERENCE); + __ bind(LOAD_LAST); + // Last 4 UTF-16 characters are already pre-loaded into tmp4 by compare_string_8_x_LU. + // No need to load it again + __ mv(tmpU, tmp4); + __ ld(tmpL, Address(strL)); + __ inflate_lo32(tmp3, tmpL); + __ mv(tmpL, tmp3); + __ xorr(tmp3, tmpU, tmpL); + __ beqz(tmp3, DONE); + + // Find the first different characters in the longwords and + // compute their difference. + __ bind(CALCULATE_DIFFERENCE); + __ ctzc_bit(tmp4, tmp3); + __ srl(tmp1, tmp1, tmp4); + __ srl(tmp5, tmp5, tmp4); + __ andi(tmp1, tmp1, 0xFFFF); + __ andi(tmp5, tmp5, 0xFFFF); + __ sub(result, tmp1, tmp5); + __ bind(DONE); + __ pop_reg(spilled_regs, sp); + __ ret(); + return entry; + } + + address generate_method_entry_barrier() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier"); + + Label deoptimize_label; + + address start = __ pc(); + + __ set_last_Java_frame(sp, fp, ra, t0); + + __ enter(); + __ add(t1, sp, wordSize); + + __ sub(sp, sp, 4 * wordSize); + + __ push_call_clobbered_registers(); + + __ mv(c_rarg0, t1); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1); + + __ reset_last_Java_frame(true); + + __ mv(t0, x10); + + __ pop_call_clobbered_registers(); + + __ bnez(t0, deoptimize_label); + + __ leave(); + __ ret(); + + __ BIND(deoptimize_label); + + __ ld(t0, Address(sp, 0)); + __ ld(fp, Address(sp, wordSize)); + __ ld(ra, Address(sp, wordSize * 2)); + __ ld(t1, Address(sp, wordSize * 3)); + + __ mv(sp, t0); + __ jr(t1); + + return start; + } + + // x10 = result + // x11 = str1 + // x12 = cnt1 + // x13 = str2 + // x14 = cnt2 + // x28 = tmp1 + // x29 = tmp2 + // x30 = tmp3 + // x31 = tmp4 + address generate_compare_long_string_same_encoding(bool isLL) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", isLL ? + "compare_long_string_same_encoding LL" : "compare_long_string_same_encoding UU"); + address entry = __ pc(); + Label SMALL_LOOP, CHECK_LAST, DIFF2, TAIL, + LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF; + const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14, + tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31; + RegSet spilled_regs = RegSet::of(tmp4, tmp5); + + // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used + // update cnt2 counter with already loaded 8 bytes + __ sub(cnt2, cnt2, wordSize / (isLL ? 1 : 2)); + // update pointers, because of previous read + __ add(str1, str1, wordSize); + __ add(str2, str2, wordSize); + // less than 16 bytes left? + __ sub(cnt2, cnt2, isLL ? 16 : 8); + __ push_reg(spilled_regs, sp); + __ bltz(cnt2, TAIL); + __ bind(SMALL_LOOP); + compare_string_16_bytes_same(DIFF, DIFF2); + __ sub(cnt2, cnt2, isLL ? 16 : 8); + __ bgez(cnt2, SMALL_LOOP); + __ bind(TAIL); + __ addi(cnt2, cnt2, isLL ? 16 : 8); + __ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); + __ sub(cnt2, cnt2, isLL ? 8 : 4); + __ blez(cnt2, CHECK_LAST); + __ xorr(tmp4, tmp1, tmp2); + __ bnez(tmp4, DIFF); + __ ld(tmp1, Address(str1)); + __ addi(str1, str1, 8); + __ ld(tmp2, Address(str2)); + __ addi(str2, str2, 8); + __ sub(cnt2, cnt2, isLL ? 8 : 4); + __ bind(CHECK_LAST); + if (!isLL) { + __ add(cnt2, cnt2, cnt2); // now in bytes + } + __ xorr(tmp4, tmp1, tmp2); + __ bnez(tmp4, DIFF); + __ add(str1, str1, cnt2); + __ ld(tmp5, Address(str1)); + __ add(str2, str2, cnt2); + __ ld(cnt1, Address(str2)); + __ xorr(tmp4, tmp5, cnt1); + __ beqz(tmp4, LENGTH_DIFF); + // Find the first different characters in the longwords and + // compute their difference. + __ bind(DIFF2); + __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb + __ srl(tmp5, tmp5, tmp3); + __ srl(cnt1, cnt1, tmp3); + if (isLL) { + __ andi(tmp5, tmp5, 0xFF); + __ andi(cnt1, cnt1, 0xFF); + } else { + __ andi(tmp5, tmp5, 0xFFFF); + __ andi(cnt1, cnt1, 0xFFFF); + } + __ sub(result, tmp5, cnt1); + __ j(LENGTH_DIFF); + __ bind(DIFF); + __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb + __ srl(tmp1, tmp1, tmp3); + __ srl(tmp2, tmp2, tmp3); + if (isLL) { + __ andi(tmp1, tmp1, 0xFF); + __ andi(tmp2, tmp2, 0xFF); + } else { + __ andi(tmp1, tmp1, 0xFFFF); + __ andi(tmp2, tmp2, 0xFFFF); + } + __ sub(result, tmp1, tmp2); + __ j(LENGTH_DIFF); + __ bind(LAST_CHECK_AND_LENGTH_DIFF); + __ xorr(tmp4, tmp1, tmp2); + __ bnez(tmp4, DIFF); + __ bind(LENGTH_DIFF); + __ pop_reg(spilled_regs, sp); + __ ret(); + return entry; + } + + void generate_compare_long_strings() { + StubRoutines::riscv::_compare_long_string_LL = generate_compare_long_string_same_encoding(true); + StubRoutines::riscv::_compare_long_string_UU = generate_compare_long_string_same_encoding(false); + StubRoutines::riscv::_compare_long_string_LU = generate_compare_long_string_different_encoding(true); + StubRoutines::riscv::_compare_long_string_UL = generate_compare_long_string_different_encoding(false); + } + + // x10 result + // x11 src + // x12 src count + // x13 pattern + // x14 pattern count + address generate_string_indexof_linear(bool needle_isL, bool haystack_isL) + { + const char* stubName = needle_isL + ? (haystack_isL ? "indexof_linear_ll" : "indexof_linear_ul") + : "indexof_linear_uu"; + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", stubName); + address entry = __ pc(); + + int needle_chr_size = needle_isL ? 1 : 2; + int haystack_chr_size = haystack_isL ? 1 : 2; + int needle_chr_shift = needle_isL ? 0 : 1; + int haystack_chr_shift = haystack_isL ? 0 : 1; + bool isL = needle_isL && haystack_isL; + // parameters + Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14; + // temporary registers + Register mask1 = x20, match_mask = x21, first = x22, trailing_zeros = x23, mask2 = x24, tmp = x25; + // redefinitions + Register ch1 = x28, ch2 = x29; + RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29); + + __ push_reg(spilled_regs, sp); + + Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO, + L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED, + L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP, + L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH, + L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2, + L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH; + + __ ld(ch1, Address(needle)); + __ ld(ch2, Address(haystack)); + // src.length - pattern.length + __ sub(haystack_len, haystack_len, needle_len); + + // first is needle[0] + __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first); + uint64_t mask0101 = UCONST64(0x0101010101010101); + uint64_t mask0001 = UCONST64(0x0001000100010001); + __ mv(mask1, haystack_isL ? mask0101 : mask0001); + __ mul(first, first, mask1); + uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); + uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); + __ mv(mask2, haystack_isL ? mask7f7f : mask7fff); + if (needle_isL != haystack_isL) { + __ mv(tmp, ch1); + } + __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size - 1); + __ blez(haystack_len, L_SMALL); + + if (needle_isL != haystack_isL) { + __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros); + } + // xorr, sub, orr, notr, andr + // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i] + // eg: + // first: aa aa aa aa aa aa aa aa + // ch2: aa aa li nx jd ka aa aa + // match_mask: 80 80 00 00 00 00 80 80 + __ compute_match_mask(ch2, first, match_mask, mask1, mask2); + + // search first char of needle, if success, goto L_HAS_ZERO; + __ bnez(match_mask, L_HAS_ZERO); + __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size); + __ add(result, result, wordSize / haystack_chr_size); + __ add(haystack, haystack, wordSize); + __ bltz(haystack_len, L_POST_LOOP); + + __ bind(L_LOOP); + __ ld(ch2, Address(haystack)); + __ compute_match_mask(ch2, first, match_mask, mask1, mask2); + __ bnez(match_mask, L_HAS_ZERO); + + __ bind(L_LOOP_PROCEED); + __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size); + __ add(haystack, haystack, wordSize); + __ add(result, result, wordSize / haystack_chr_size); + __ bgez(haystack_len, L_LOOP); + + __ bind(L_POST_LOOP); + __ mv(ch2, -wordSize / haystack_chr_size); + __ ble(haystack_len, ch2, NOMATCH); // no extra characters to check + __ ld(ch2, Address(haystack)); + __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift); + __ neg(haystack_len, haystack_len); + __ xorr(ch2, first, ch2); + __ sub(match_mask, ch2, mask1); + __ orr(ch2, ch2, mask2); + __ mv(trailing_zeros, -1); // all bits set + __ j(L_SMALL_PROCEED); + + __ align(OptoLoopAlignment); + __ bind(L_SMALL); + __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift); + __ neg(haystack_len, haystack_len); + if (needle_isL != haystack_isL) { + __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros); + } + __ xorr(ch2, first, ch2); + __ sub(match_mask, ch2, mask1); + __ orr(ch2, ch2, mask2); + __ mv(trailing_zeros, -1); // all bits set + + __ bind(L_SMALL_PROCEED); + __ srl(trailing_zeros, trailing_zeros, haystack_len); // mask. zeroes on useless bits. + __ notr(ch2, ch2); + __ andr(match_mask, match_mask, ch2); + __ andr(match_mask, match_mask, trailing_zeros); // clear useless bits and check + __ beqz(match_mask, NOMATCH); + + __ bind(L_SMALL_HAS_ZERO_LOOP); + __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, ch2, tmp); // count trailing zeros + __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); + __ mv(ch2, wordSize / haystack_chr_size); + __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2); + __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); + __ mv(trailing_zeros, wordSize / haystack_chr_size); + __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); + + __ bind(L_SMALL_CMP_LOOP); + __ shadd(first, trailing_zeros, needle, first, needle_chr_shift); + __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift); + needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first)); + haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); + __ add(trailing_zeros, trailing_zeros, 1); + __ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP); + __ beq(first, ch2, L_SMALL_CMP_LOOP); + + __ bind(L_SMALL_CMP_LOOP_NOMATCH); + __ beqz(match_mask, NOMATCH); + __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2); + __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); + __ add(result, result, 1); + __ add(haystack, haystack, haystack_chr_size); + __ j(L_SMALL_HAS_ZERO_LOOP); + + __ align(OptoLoopAlignment); + __ bind(L_SMALL_CMP_LOOP_LAST_CMP); + __ bne(first, ch2, L_SMALL_CMP_LOOP_NOMATCH); + __ j(DONE); + + __ align(OptoLoopAlignment); + __ bind(L_SMALL_CMP_LOOP_LAST_CMP2); + __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); + __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); + __ j(DONE); + + __ align(OptoLoopAlignment); + __ bind(L_HAS_ZERO); + __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2); + __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); + __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2); + __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits) + __ sub(result, result, 1); // array index from 0, so result -= 1 + + __ bind(L_HAS_ZERO_LOOP); + __ mv(needle_len, wordSize / haystack_chr_size); + __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2); + __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2); + // load next 8 bytes from haystack, and increase result index + __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); + __ add(result, result, 1); + __ mv(trailing_zeros, wordSize / haystack_chr_size); + __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); + + // compare one char + __ bind(L_CMP_LOOP); + __ shadd(needle_len, trailing_zeros, needle, needle_len, needle_chr_shift); + needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len)); + __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift); + haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); + __ add(trailing_zeros, trailing_zeros, 1); // next char index + __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2); + __ bge(trailing_zeros, tmp, L_CMP_LOOP_LAST_CMP); + __ beq(needle_len, ch2, L_CMP_LOOP); + + __ bind(L_CMP_LOOP_NOMATCH); + __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH); + __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index + __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); + __ add(haystack, haystack, haystack_chr_size); + __ j(L_HAS_ZERO_LOOP); + + __ align(OptoLoopAlignment); + __ bind(L_CMP_LOOP_LAST_CMP); + __ bne(needle_len, ch2, L_CMP_LOOP_NOMATCH); + __ j(DONE); + + __ align(OptoLoopAlignment); + __ bind(L_CMP_LOOP_LAST_CMP2); + __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); + __ add(result, result, 1); + __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); + __ j(DONE); + + __ align(OptoLoopAlignment); + __ bind(L_HAS_ZERO_LOOP_NOMATCH); + // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until + // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP, + // so, result was increased at max by wordSize/str2_chr_size - 1, so, + // respective high bit wasn't changed. L_LOOP_PROCEED will increase + // result by analyzed characters value, so, we can just reset lower bits + // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL + // 2) restore needle_len and haystack_len values from "compressed" haystack_len + // 3) advance haystack value to represent next haystack octet. result & 7/3 is + // index of last analyzed substring inside current octet. So, haystack in at + // respective start address. We need to advance it to next octet + __ andi(match_mask, result, wordSize / haystack_chr_size - 1); + __ srli(needle_len, haystack_len, BitsPerByte * wordSize / 2); + __ andi(result, result, haystack_isL ? -8 : -4); + __ slli(tmp, match_mask, haystack_chr_shift); + __ sub(haystack, haystack, tmp); + __ addw(haystack_len, haystack_len, zr); + __ j(L_LOOP_PROCEED); + + __ align(OptoLoopAlignment); + __ bind(NOMATCH); + __ mv(result, -1); + + __ bind(DONE); + __ pop_reg(spilled_regs, sp); + __ ret(); + return entry; + } + + void generate_string_indexof_stubs() + { + StubRoutines::riscv::_string_indexof_linear_ll = generate_string_indexof_linear(true, true); + StubRoutines::riscv::_string_indexof_linear_uu = generate_string_indexof_linear(false, false); + StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false); + } + +#ifdef COMPILER2 + address generate_mulAdd() + { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "mulAdd"); + + address entry = __ pc(); + + const Register out = x10; + const Register in = x11; + const Register offset = x12; + const Register len = x13; + const Register k = x14; + const Register tmp = x28; + + BLOCK_COMMENT("Entry:"); + __ enter(); + __ mul_add(out, in, offset, len, k, tmp); + __ leave(); + __ ret(); + + return entry; + } + + /** + * Arguments: + * + * Input: + * c_rarg0 - x address + * c_rarg1 - x length + * c_rarg2 - y address + * c_rarg3 - y length + * c_rarg4 - z address + * c_rarg5 - z length + */ + address generate_multiplyToLen() + { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); + address entry = __ pc(); + + const Register x = x10; + const Register xlen = x11; + const Register y = x12; + const Register ylen = x13; + const Register z = x14; + const Register zlen = x15; + + const Register tmp1 = x16; + const Register tmp2 = x17; + const Register tmp3 = x7; + const Register tmp4 = x28; + const Register tmp5 = x29; + const Register tmp6 = x30; + const Register tmp7 = x31; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(); + + return entry; + } + + address generate_squareToLen() + { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "squareToLen"); + address entry = __ pc(); + + const Register x = x10; + const Register xlen = x11; + const Register z = x12; + const Register zlen = x13; + const Register y = x14; // == x + const Register ylen = x15; // == xlen + + const Register tmp1 = x16; + const Register tmp2 = x17; + const Register tmp3 = x7; + const Register tmp4 = x28; + const Register tmp5 = x29; + const Register tmp6 = x30; + const Register tmp7 = x31; + + BLOCK_COMMENT("Entry:"); + __ enter(); + __ mv(y, x); + __ mv(ylen, xlen); + __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); + __ leave(); + __ ret(); + + return entry; + } + + // Arguments: + // + // Input: + // c_rarg0 - newArr address + // c_rarg1 - oldArr address + // c_rarg2 - newIdx + // c_rarg3 - shiftCount + // c_rarg4 - numIter + // + address generate_bigIntegerLeftShift() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker"); + address entry = __ pc(); + + Label loop, exit; + + Register newArr = c_rarg0; + Register oldArr = c_rarg1; + Register newIdx = c_rarg2; + Register shiftCount = c_rarg3; + Register numIter = c_rarg4; + + Register shiftRevCount = c_rarg5; + Register oldArrNext = t1; + + __ beqz(numIter, exit); + __ shadd(newArr, newIdx, newArr, t0, 2); + + __ li(shiftRevCount, 32); + __ sub(shiftRevCount, shiftRevCount, shiftCount); + + __ bind(loop); + __ addi(oldArrNext, oldArr, 4); + __ vsetvli(t0, numIter, Assembler::e32, Assembler::m4); + __ vle32_v(v0, oldArr); + __ vle32_v(v4, oldArrNext); + __ vsll_vx(v0, v0, shiftCount); + __ vsrl_vx(v4, v4, shiftRevCount); + __ vor_vv(v0, v0, v4); + __ vse32_v(v0, newArr); + __ sub(numIter, numIter, t0); + __ shadd(oldArr, t0, oldArr, t1, 2); + __ shadd(newArr, t0, newArr, t1, 2); + __ bnez(numIter, loop); + + __ bind(exit); + __ ret(); + + return entry; + } + + // Arguments: + // + // Input: + // c_rarg0 - newArr address + // c_rarg1 - oldArr address + // c_rarg2 - newIdx + // c_rarg3 - shiftCount + // c_rarg4 - numIter + // + address generate_bigIntegerRightShift() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker"); + address entry = __ pc(); + + Label loop, exit; + + Register newArr = c_rarg0; + Register oldArr = c_rarg1; + Register newIdx = c_rarg2; + Register shiftCount = c_rarg3; + Register numIter = c_rarg4; + Register idx = numIter; + + Register shiftRevCount = c_rarg5; + Register oldArrNext = c_rarg6; + Register newArrCur = t0; + Register oldArrCur = t1; + + __ beqz(idx, exit); + __ shadd(newArr, newIdx, newArr, t0, 2); + + __ li(shiftRevCount, 32); + __ sub(shiftRevCount, shiftRevCount, shiftCount); + + __ bind(loop); + __ vsetvli(t0, idx, Assembler::e32, Assembler::m4); + __ sub(idx, idx, t0); + __ shadd(oldArrNext, idx, oldArr, t1, 2); + __ shadd(newArrCur, idx, newArr, t1, 2); + __ addi(oldArrCur, oldArrNext, 4); + __ vle32_v(v0, oldArrCur); + __ vle32_v(v4, oldArrNext); + __ vsrl_vx(v0, v0, shiftCount); + __ vsll_vx(v4, v4, shiftRevCount); + __ vor_vv(v0, v0, v4); + __ vse32_v(v0, newArrCur); + __ bnez(idx, loop); + + __ bind(exit); + __ ret(); + + return entry; + } +#endif + +#ifdef COMPILER2 + class MontgomeryMultiplyGenerator : public MacroAssembler { + + Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn, + Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2, Ri, Rj; + + RegSet _toSave; + bool _squaring; + + public: + MontgomeryMultiplyGenerator (Assembler *as, bool squaring) + : MacroAssembler(as->code()), _squaring(squaring) { + + // Register allocation + + Register reg = c_rarg0; + Pa_base = reg; // Argument registers + if (squaring) { + Pb_base = Pa_base; + } else { + Pb_base = ++reg; + } + Pn_base = ++reg; + Rlen= ++reg; + inv = ++reg; + Pm_base = ++reg; + + // Working registers: + Ra = ++reg; // The current digit of a, b, n, and m. + Rb = ++reg; + Rm = ++reg; + Rn = ++reg; + + Pa = ++reg; // Pointers to the current/next digit of a, b, n, and m. + Pb = ++reg; + Pm = ++reg; + Pn = ++reg; + + tmp0 = ++reg; // Three registers which form a + tmp1 = ++reg; // triple-precision accumuator. + tmp2 = ++reg; + + Ri = x6; // Inner and outer loop indexes. + Rj = x7; + + Rhi_ab = x28; // Product registers: low and high parts + Rlo_ab = x29; // of a*b and m*n. + Rhi_mn = x30; + Rlo_mn = x31; + + // x18 and up are callee-saved. + _toSave = RegSet::range(x18, reg) + Pm_base; + } + + private: + void save_regs() { + push_reg(_toSave, sp); + } + + void restore_regs() { + pop_reg(_toSave, sp); + } + + template + void unroll_2(Register count, T block) { + Label loop, end, odd; + beqz(count, end); + andi(t0, count, 0x1); + bnez(t0, odd); + align(16); + bind(loop); + (this->*block)(); + bind(odd); + (this->*block)(); + addi(count, count, -2); + bgtz(count, loop); + bind(end); + } + + template + void unroll_2(Register count, T block, Register d, Register s, Register tmp) { + Label loop, end, odd; + beqz(count, end); + andi(tmp, count, 0x1); + bnez(tmp, odd); + align(16); + bind(loop); + (this->*block)(d, s, tmp); + bind(odd); + (this->*block)(d, s, tmp); + addi(count, count, -2); + bgtz(count, loop); + bind(end); + } + + void pre1(RegisterOrConstant i) { + block_comment("pre1"); + // Pa = Pa_base; + // Pb = Pb_base + i; + // Pm = Pm_base; + // Pn = Pn_base + i; + // Ra = *Pa; + // Rb = *Pb; + // Rm = *Pm; + // Rn = *Pn; + if (i.is_register()) { + slli(t0, i.as_register(), LogBytesPerWord); + } else { + mv(t0, i.as_constant()); + slli(t0, t0, LogBytesPerWord); + } + + mv(Pa, Pa_base); + add(Pb, Pb_base, t0); + mv(Pm, Pm_base); + add(Pn, Pn_base, t0); + + ld(Ra, Address(Pa)); + ld(Rb, Address(Pb)); + ld(Rm, Address(Pm)); + ld(Rn, Address(Pn)); + + // Zero the m*n result. + mv(Rhi_mn, zr); + mv(Rlo_mn, zr); + } + + // The core multiply-accumulate step of a Montgomery + // multiplication. The idea is to schedule operations as a + // pipeline so that instructions with long latencies (loads and + // multiplies) have time to complete before their results are + // used. This most benefits in-order implementations of the + // architecture but out-of-order ones also benefit. + void step() { + block_comment("step"); + // MACC(Ra, Rb, tmp0, tmp1, tmp2); + // Ra = *++Pa; + // Rb = *--Pb; + mulhu(Rhi_ab, Ra, Rb); + mul(Rlo_ab, Ra, Rb); + addi(Pa, Pa, wordSize); + ld(Ra, Address(Pa)); + addi(Pb, Pb, -wordSize); + ld(Rb, Address(Pb)); + acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the + // previous iteration. + // MACC(Rm, Rn, tmp0, tmp1, tmp2); + // Rm = *++Pm; + // Rn = *--Pn; + mulhu(Rhi_mn, Rm, Rn); + mul(Rlo_mn, Rm, Rn); + addi(Pm, Pm, wordSize); + ld(Rm, Address(Pm)); + addi(Pn, Pn, -wordSize); + ld(Rn, Address(Pn)); + acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); + } + + void post1() { + block_comment("post1"); + + // MACC(Ra, Rb, tmp0, tmp1, tmp2); + // Ra = *++Pa; + // Rb = *--Pb; + mulhu(Rhi_ab, Ra, Rb); + mul(Rlo_ab, Ra, Rb); + acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n + acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); + + // *Pm = Rm = tmp0 * inv; + mul(Rm, tmp0, inv); + sd(Rm, Address(Pm)); + + // MACC(Rm, Rn, tmp0, tmp1, tmp2); + // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; + mulhu(Rhi_mn, Rm, Rn); + +#ifndef PRODUCT + // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply"); + { + mul(Rlo_mn, Rm, Rn); + add(Rlo_mn, tmp0, Rlo_mn); + Label ok; + beqz(Rlo_mn, ok); + stop("broken Montgomery multiply"); + bind(ok); + } +#endif + // We have very carefully set things up so that + // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate + // the lower half of Rm * Rn because we know the result already: + // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff + // tmp0 != 0. So, rather than do a mul and an cad we just set + // the carry flag iff tmp0 is nonzero. + // + // mul(Rlo_mn, Rm, Rn); + // cad(zr, tmp0, Rlo_mn); + addi(t0, tmp0, -1); + sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero + cadc(tmp0, tmp1, Rhi_mn, t0); + adc(tmp1, tmp2, zr, t0); + mv(tmp2, zr); + } + + void pre2(Register i, Register len) { + block_comment("pre2"); + // Pa = Pa_base + i-len; + // Pb = Pb_base + len; + // Pm = Pm_base + i-len; + // Pn = Pn_base + len; + + sub(Rj, i, len); + // Rj == i-len + + // Ra as temp register + slli(Ra, Rj, LogBytesPerWord); + add(Pa, Pa_base, Ra); + add(Pm, Pm_base, Ra); + slli(Ra, len, LogBytesPerWord); + add(Pb, Pb_base, Ra); + add(Pn, Pn_base, Ra); + + // Ra = *++Pa; + // Rb = *--Pb; + // Rm = *++Pm; + // Rn = *--Pn; + add(Pa, Pa, wordSize); + ld(Ra, Address(Pa)); + add(Pb, Pb, -wordSize); + ld(Rb, Address(Pb)); + add(Pm, Pm, wordSize); + ld(Rm, Address(Pm)); + add(Pn, Pn, -wordSize); + ld(Rn, Address(Pn)); + + mv(Rhi_mn, zr); + mv(Rlo_mn, zr); + } + + void post2(Register i, Register len) { + block_comment("post2"); + sub(Rj, i, len); + + cad(tmp0, tmp0, Rlo_mn, t0); // The pending m*n, low part + + // As soon as we know the least significant digit of our result, + // store it. + // Pm_base[i-len] = tmp0; + // Rj as temp register + slli(Rj, Rj, LogBytesPerWord); + add(Rj, Pm_base, Rj); + sd(tmp0, Address(Rj)); + + // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; + cadc(tmp0, tmp1, Rhi_mn, t0); // The pending m*n, high part + adc(tmp1, tmp2, zr, t0); + mv(tmp2, zr); + } + + // A carry in tmp0 after Montgomery multiplication means that we + // should subtract multiples of n from our result in m. We'll + // keep doing that until there is no carry. + void normalize(Register len) { + block_comment("normalize"); + // while (tmp0) + // tmp0 = sub(Pm_base, Pn_base, tmp0, len); + Label loop, post, again; + Register cnt = tmp1, i = tmp2; // Re-use registers; we're done with them now + beqz(tmp0, post); { + bind(again); { + mv(i, zr); + mv(cnt, len); + slli(Rn, i, LogBytesPerWord); + add(Rm, Pm_base, Rn); + ld(Rm, Address(Rm)); + add(Rn, Pn_base, Rn); + ld(Rn, Address(Rn)); + li(t0, 1); // set carry flag, i.e. no borrow + align(16); + bind(loop); { + notr(Rn, Rn); + add(Rm, Rm, t0); + add(Rm, Rm, Rn); + sltu(t0, Rm, Rn); + slli(Rn, i, LogBytesPerWord); // Rn as temp register + add(Rn, Pm_base, Rn); + sd(Rm, Address(Rn)); + add(i, i, 1); + slli(Rn, i, LogBytesPerWord); + add(Rm, Pm_base, Rn); + ld(Rm, Address(Rm)); + add(Rn, Pn_base, Rn); + ld(Rn, Address(Rn)); + sub(cnt, cnt, 1); + } bnez(cnt, loop); + addi(tmp0, tmp0, -1); + add(tmp0, tmp0, t0); + } bnez(tmp0, again); + } bind(post); + } + + // Move memory at s to d, reversing words. + // Increments d to end of copied memory + // Destroys tmp1, tmp2 + // Preserves len + // Leaves s pointing to the address which was in d at start + void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { + assert(tmp1 < x28 && tmp2 < x28, "register corruption"); + + slli(tmp1, len, LogBytesPerWord); + add(s, s, tmp1); + mv(tmp1, len); + unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); + slli(tmp1, len, LogBytesPerWord); + sub(s, d, tmp1); + } + // [63...0] -> [31...0][63...32] + void reverse1(Register d, Register s, Register tmp) { + addi(s, s, -wordSize); + ld(tmp, Address(s)); + ror_imm(tmp, tmp, 32, t0); + sd(tmp, Address(d)); + addi(d, d, wordSize); + } + + void step_squaring() { + // An extra ACC + step(); + acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); + } + + void last_squaring(Register i) { + Label dont; + // if ((i & 1) == 0) { + andi(t0, i, 0x1); + bnez(t0, dont); { + // MACC(Ra, Rb, tmp0, tmp1, tmp2); + // Ra = *++Pa; + // Rb = *--Pb; + mulhu(Rhi_ab, Ra, Rb); + mul(Rlo_ab, Ra, Rb); + acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); + } bind(dont); + } + + void extra_step_squaring() { + acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n + + // MACC(Rm, Rn, tmp0, tmp1, tmp2); + // Rm = *++Pm; + // Rn = *--Pn; + mulhu(Rhi_mn, Rm, Rn); + mul(Rlo_mn, Rm, Rn); + addi(Pm, Pm, wordSize); + ld(Rm, Address(Pm)); + addi(Pn, Pn, -wordSize); + ld(Rn, Address(Pn)); + } + + void post1_squaring() { + acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n + + // *Pm = Rm = tmp0 * inv; + mul(Rm, tmp0, inv); + sd(Rm, Address(Pm)); + + // MACC(Rm, Rn, tmp0, tmp1, tmp2); + // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; + mulhu(Rhi_mn, Rm, Rn); + +#ifndef PRODUCT + // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply"); + { + mul(Rlo_mn, Rm, Rn); + add(Rlo_mn, tmp0, Rlo_mn); + Label ok; + beqz(Rlo_mn, ok); { + stop("broken Montgomery multiply"); + } bind(ok); + } +#endif + // We have very carefully set things up so that + // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate + // the lower half of Rm * Rn because we know the result already: + // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff + // tmp0 != 0. So, rather than do a mul and a cad we just set + // the carry flag iff tmp0 is nonzero. + // + // mul(Rlo_mn, Rm, Rn); + // cad(zr, tmp, Rlo_mn); + addi(t0, tmp0, -1); + sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero + cadc(tmp0, tmp1, Rhi_mn, t0); + adc(tmp1, tmp2, zr, t0); + mv(tmp2, zr); + } + + // use t0 as carry + void acc(Register Rhi, Register Rlo, + Register tmp0, Register tmp1, Register tmp2) { + cad(tmp0, tmp0, Rlo, t0); + cadc(tmp1, tmp1, Rhi, t0); + adc(tmp2, tmp2, zr, t0); + } + + public: + /** + * Fast Montgomery multiplication. The derivation of the + * algorithm is in A Cryptographic Library for the Motorola + * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. + * + * Arguments: + * + * Inputs for multiplication: + * c_rarg0 - int array elements a + * c_rarg1 - int array elements b + * c_rarg2 - int array elements n (the modulus) + * c_rarg3 - int length + * c_rarg4 - int inv + * c_rarg5 - int array elements m (the result) + * + * Inputs for squaring: + * c_rarg0 - int array elements a + * c_rarg1 - int array elements n (the modulus) + * c_rarg2 - int length + * c_rarg3 - int inv + * c_rarg4 - int array elements m (the result) + * + */ + address generate_multiply() { + Label argh, nothing; + bind(argh); + stop("MontgomeryMultiply total_allocation must be <= 8192"); + + align(CodeEntryAlignment); + address entry = pc(); + + beqz(Rlen, nothing); + + enter(); + + // Make room. + li(Ra, 512); + bgt(Rlen, Ra, argh); + slli(Ra, Rlen, exact_log2(4 * sizeof(jint))); + sub(Ra, sp, Ra); + andi(sp, Ra, -2 * wordSize); + + srliw(Rlen, Rlen, 1); // length in longwords = len/2 + + { + // Copy input args, reversing as we go. We use Ra as a + // temporary variable. + reverse(Ra, Pa_base, Rlen, Ri, Rj); + if (!_squaring) + reverse(Ra, Pb_base, Rlen, Ri, Rj); + reverse(Ra, Pn_base, Rlen, Ri, Rj); + } + + // Push all call-saved registers and also Pm_base which we'll need + // at the end. + save_regs(); + +#ifndef PRODUCT + // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + { + ld(Rn, Address(Pn_base)); + mul(Rlo_mn, Rn, inv); + li(t0, -1); + Label ok; + beq(Rlo_mn, t0, ok); + stop("broken inverse in Montgomery multiply"); + bind(ok); + } +#endif + + mv(Pm_base, Ra); + + mv(tmp0, zr); + mv(tmp1, zr); + mv(tmp2, zr); + + block_comment("for (int i = 0; i < len; i++) {"); + mv(Ri, zr); { + Label loop, end; + bge(Ri, Rlen, end); + + bind(loop); + pre1(Ri); + + block_comment(" for (j = i; j; j--) {"); { + mv(Rj, Ri); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step); + } block_comment(" } // j"); + + post1(); + addw(Ri, Ri, 1); + blt(Ri, Rlen, loop); + bind(end); + block_comment("} // i"); + } + + block_comment("for (int i = len; i < 2*len; i++) {"); + mv(Ri, Rlen); { + Label loop, end; + slli(t0, Rlen, 1); + bge(Ri, t0, end); + + bind(loop); + pre2(Ri, Rlen); + + block_comment(" for (j = len*2-i-1; j; j--) {"); { + slliw(Rj, Rlen, 1); + subw(Rj, Rj, Ri); + subw(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step); + } block_comment(" } // j"); + + post2(Ri, Rlen); + addw(Ri, Ri, 1); + slli(t0, Rlen, 1); + blt(Ri, t0, loop); + bind(end); + } + block_comment("} // i"); + + normalize(Rlen); + + mv(Ra, Pm_base); // Save Pm_base in Ra + restore_regs(); // Restore caller's Pm_base + + // Copy our result into caller's Pm_base + reverse(Pm_base, Ra, Rlen, Ri, Rj); + + leave(); + bind(nothing); + ret(); + + return entry; + } + + /** + * + * Arguments: + * + * Inputs: + * c_rarg0 - int array elements a + * c_rarg1 - int array elements n (the modulus) + * c_rarg2 - int length + * c_rarg3 - int inv + * c_rarg4 - int array elements m (the result) + * + */ + address generate_square() { + Label argh; + bind(argh); + stop("MontgomeryMultiply total_allocation must be <= 8192"); + + align(CodeEntryAlignment); + address entry = pc(); + + enter(); + + // Make room. + li(Ra, 512); + bgt(Rlen, Ra, argh); + slli(Ra, Rlen, exact_log2(4 * sizeof(jint))); + sub(Ra, sp, Ra); + andi(sp, Ra, -2 * wordSize); + + srliw(Rlen, Rlen, 1); // length in longwords = len/2 + + { + // Copy input args, reversing as we go. We use Ra as a + // temporary variable. + reverse(Ra, Pa_base, Rlen, Ri, Rj); + reverse(Ra, Pn_base, Rlen, Ri, Rj); + } + + // Push all call-saved registers and also Pm_base which we'll need + // at the end. + save_regs(); + + mv(Pm_base, Ra); + + mv(tmp0, zr); + mv(tmp1, zr); + mv(tmp2, zr); + + block_comment("for (int i = 0; i < len; i++) {"); + mv(Ri, zr); { + Label loop, end; + bind(loop); + bge(Ri, Rlen, end); + + pre1(Ri); + + block_comment("for (j = (i+1)/2; j; j--) {"); { + addi(Rj, Ri, 1); + srliw(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); + } block_comment(" } // j"); + + last_squaring(Ri); + + block_comment(" for (j = i/2; j; j--) {"); { + srliw(Rj, Ri, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); + } block_comment(" } // j"); + + post1_squaring(); + addi(Ri, Ri, 1); + blt(Ri, Rlen, loop); + + bind(end); + block_comment("} // i"); + } + + block_comment("for (int i = len; i < 2*len; i++) {"); + mv(Ri, Rlen); { + Label loop, end; + bind(loop); + slli(t0, Rlen, 1); + bge(Ri, t0, end); + + pre2(Ri, Rlen); + + block_comment(" for (j = (2*len-i-1)/2; j; j--) {"); { + slli(Rj, Rlen, 1); + sub(Rj, Rj, Ri); + sub(Rj, Rj, 1); + srliw(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); + } block_comment(" } // j"); + + last_squaring(Ri); + + block_comment(" for (j = (2*len-i)/2; j; j--) {"); { + slli(Rj, Rlen, 1); + sub(Rj, Rj, Ri); + srliw(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); + } block_comment(" } // j"); + + post2(Ri, Rlen); + addi(Ri, Ri, 1); + slli(t0, Rlen, 1); + blt(Ri, t0, loop); + + bind(end); + block_comment("} // i"); + } + + normalize(Rlen); + + mv(Ra, Pm_base); // Save Pm_base in Ra + restore_regs(); // Restore caller's Pm_base + + // Copy our result into caller's Pm_base + reverse(Pm_base, Ra, Rlen, Ri, Rj); + + leave(); + ret(); + + return entry; + } + }; +#endif // COMPILER2 + + // Continuation point for throwing of implicit exceptions that are + // not handled in the current activation. Fabricates an exception + // oop and initiates normal exception dispatching in this + // frame. Since we need to preserve callee-saved values (currently + // only for C2, but done for C1 as well) we need a callee-saved oop + // map and therefore have to make these stubs into RuntimeStubs + // rather than BufferBlobs. If the compiler needs all registers to + // be preserved between the fault point and the exception handler + // then it must assume responsibility for that in + // AbstractCompiler::continuation_for_implicit_null_exception or + // continuation_for_implicit_division_by_zero_exception. All other + // implicit exceptions (e.g., NullPointerException or + // AbstractMethodError on entry) are either at call sites or + // otherwise assume that stack unwinding will be initiated, so + // caller saved registers were assumed volatile in the compiler. + +#undef __ +#define __ masm-> + + address generate_throw_exception(const char* name, + address runtime_entry, + Register arg1 = noreg, + Register arg2 = noreg) { + // Information about frame layout at time of blocking runtime call. + // Note that we only have to preserve callee-saved registers since + // the compilers are responsible for supplying a continuation point + // if they expect all registers to be preserved. + // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0 + assert_cond(runtime_entry != NULL); + enum layout { + fp_off = 0, + fp_off2, + return_off, + return_off2, + framesize // inclusive of return address + }; + + const int insts_size = 512; + const int locs_size = 64; + + CodeBuffer code(name, insts_size, locs_size); + OopMapSet* oop_maps = new OopMapSet(); + MacroAssembler* masm = new MacroAssembler(&code); + assert_cond(oop_maps != NULL && masm != NULL); + + address start = __ pc(); + + // This is an inlined and slightly modified version of call_VM + // which has the ability to fetch the return PC out of + // thread-local storage and also sets up last_Java_sp slightly + // differently than the real call_VM + + __ enter(); // Save FP and RA before call + + assert(is_even(framesize / 2), "sp not 16-byte aligned"); + + // ra and fp are already in place + __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog + + int frame_complete = __ pc() - start; + + // Set up last_Java_sp and last_Java_fp + address the_pc = __ pc(); + __ set_last_Java_frame(sp, fp, the_pc, t0); + + // Call runtime + if (arg1 != noreg) { + assert(arg2 != c_rarg1, "clobbered"); + __ mv(c_rarg1, arg1); + } + if (arg2 != noreg) { + __ mv(c_rarg2, arg2); + } + __ mv(c_rarg0, xthread); + BLOCK_COMMENT("call runtime_entry"); + int32_t offset = 0; + __ movptr_with_offset(t0, runtime_entry, offset); + __ jalr(x1, t0, offset); + + // Generate oop map + OopMap* map = new OopMap(framesize, 0); + assert_cond(map != NULL); + + oop_maps->add_gc_map(the_pc - start, map); + + __ reset_last_Java_frame(true); + + __ leave(); + + // check for pending exceptions +#ifdef ASSERT + Label L; + __ ld(t0, Address(xthread, Thread::pending_exception_offset())); + __ bnez(t0, L); + __ should_not_reach_here(); + __ bind(L); +#endif // ASSERT + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + + // codeBlob framesize is in words (not VMRegImpl::slot_size) + RuntimeStub* stub = + RuntimeStub::new_runtime_stub(name, + &code, + frame_complete, + (framesize >> (LogBytesPerWord - LogBytesPerInt)), + oop_maps, false); + assert(stub != NULL, "create runtime stub fail!"); + return stub->entry_point(); + } + + // Initialization + void generate_initial() { + // Generate initial stubs and initializes the entry points + + // entry points that exist in all platforms Note: This is code + // that could be shared among different platforms - however the + // benefit seems to be smaller than the disadvantage of having a + // much more complicated generator structure. See also comment in + // stubRoutines.hpp. + + StubRoutines::_forward_exception_entry = generate_forward_exception(); + + StubRoutines::_call_stub_entry = + generate_call_stub(StubRoutines::_call_stub_return_address); + + // is referenced by megamorphic call + StubRoutines::_catch_exception_entry = generate_catch_exception(); + + // Build this early so it's available for the interpreter. + StubRoutines::_throw_StackOverflowError_entry = + generate_throw_exception("StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime::throw_StackOverflowError)); + StubRoutines::_throw_delayed_StackOverflowError_entry = + generate_throw_exception("delayed StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime::throw_delayed_StackOverflowError)); + // Safefetch stubs. + generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, + &StubRoutines::_safefetch32_fault_pc, + &StubRoutines::_safefetch32_continuation_pc); + generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, + &StubRoutines::_safefetchN_fault_pc, + &StubRoutines::_safefetchN_continuation_pc); + } + + void generate_all() { + // support for verify_oop (must happen after universe_init) + StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); + StubRoutines::_throw_AbstractMethodError_entry = + generate_throw_exception("AbstractMethodError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_AbstractMethodError)); + + StubRoutines::_throw_IncompatibleClassChangeError_entry = + generate_throw_exception("IncompatibleClassChangeError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_IncompatibleClassChangeError)); + + StubRoutines::_throw_NullPointerException_at_call_entry = + generate_throw_exception("NullPointerException at call throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_NullPointerException_at_call)); + // arraycopy stubs used by compilers + generate_arraycopy_stubs(); + +#ifdef COMPILER2 + if (UseMulAddIntrinsic) { + StubRoutines::_mulAdd = generate_mulAdd(); + } + + if (UseMultiplyToLenIntrinsic) { + StubRoutines::_multiplyToLen = generate_multiplyToLen(); + } + + if (UseSquareToLenIntrinsic) { + StubRoutines::_squareToLen = generate_squareToLen(); + } + + if (UseMontgomeryMultiplyIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); + MontgomeryMultiplyGenerator g(_masm, /*squaring*/false); + StubRoutines::_montgomeryMultiply = g.generate_multiply(); + } + + if (UseMontgomerySquareIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomerySquare"); + MontgomeryMultiplyGenerator g(_masm, /*squaring*/true); + StubRoutines::_montgomerySquare = g.generate_square(); + } + + if (UseRVVForBigIntegerShiftIntrinsics) { + StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift(); + StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift(); + } +#endif + + generate_compare_long_strings(); + + generate_string_indexof_stubs(); + + BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); + if (bs_nm != NULL) { + StubRoutines::riscv::_method_entry_barrier = generate_method_entry_barrier(); + } + + StubRoutines::riscv::set_completed(); + } + + public: + StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { + if (all) { + generate_all(); + } else { + generate_initial(); + } + } + + ~StubGenerator() {} +}; // end class declaration + +#define UCM_TABLE_MAX_ENTRIES 8 +void StubGenerator_generate(CodeBuffer* code, bool all) { + if (UnsafeCopyMemory::_table == NULL) { + UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); + } + + StubGenerator g(code, all); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "utilities/globalDefinitions.hpp" + +// Implementation of the platform-specific part of StubRoutines - for +// a description of how to extend it, see the stubRoutines.hpp file. + +address StubRoutines::riscv::_get_previous_sp_entry = NULL; + +address StubRoutines::riscv::_f2i_fixup = NULL; +address StubRoutines::riscv::_f2l_fixup = NULL; +address StubRoutines::riscv::_d2i_fixup = NULL; +address StubRoutines::riscv::_d2l_fixup = NULL; +address StubRoutines::riscv::_float_sign_mask = NULL; +address StubRoutines::riscv::_float_sign_flip = NULL; +address StubRoutines::riscv::_double_sign_mask = NULL; +address StubRoutines::riscv::_double_sign_flip = NULL; +address StubRoutines::riscv::_zero_blocks = NULL; +address StubRoutines::riscv::_compare_long_string_LL = NULL; +address StubRoutines::riscv::_compare_long_string_UU = NULL; +address StubRoutines::riscv::_compare_long_string_LU = NULL; +address StubRoutines::riscv::_compare_long_string_UL = NULL; +address StubRoutines::riscv::_string_indexof_linear_ll = NULL; +address StubRoutines::riscv::_string_indexof_linear_uu = NULL; +address StubRoutines::riscv::_string_indexof_linear_ul = NULL; +address StubRoutines::riscv::_large_byte_array_inflate = NULL; +address StubRoutines::riscv::_method_entry_barrier = NULL; + +bool StubRoutines::riscv::_completed = false; Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_STUBROUTINES_RISCV_HPP +#define CPU_RISCV_STUBROUTINES_RISCV_HPP + +// This file holds the platform specific parts of the StubRoutines +// definition. See stubRoutines.hpp for a description on how to +// extend it. + +static bool returns_to_call_stub(address return_pc) { + return return_pc == _call_stub_return_address; +} + +enum platform_dependent_constants { + code_size1 = 19000, // simply increase if too small (assembler will crash if too small) + code_size2 = 28000 // simply increase if too small (assembler will crash if too small) +}; + +class riscv { + friend class StubGenerator; + + private: + static address _get_previous_sp_entry; + + static address _f2i_fixup; + static address _f2l_fixup; + static address _d2i_fixup; + static address _d2l_fixup; + + static address _float_sign_mask; + static address _float_sign_flip; + static address _double_sign_mask; + static address _double_sign_flip; + + static address _zero_blocks; + + static address _compare_long_string_LL; + static address _compare_long_string_LU; + static address _compare_long_string_UL; + static address _compare_long_string_UU; + static address _string_indexof_linear_ll; + static address _string_indexof_linear_uu; + static address _string_indexof_linear_ul; + static address _large_byte_array_inflate; + + static address _method_entry_barrier; + + static bool _completed; + + public: + + static address get_previous_sp_entry() { + return _get_previous_sp_entry; + } + + static address f2i_fixup() { + return _f2i_fixup; + } + + static address f2l_fixup() { + return _f2l_fixup; + } + + static address d2i_fixup() { + return _d2i_fixup; + } + + static address d2l_fixup() { + return _d2l_fixup; + } + + static address float_sign_mask() { + return _float_sign_mask; + } + + static address float_sign_flip() { + return _float_sign_flip; + } + + static address double_sign_mask() { + return _double_sign_mask; + } + + static address double_sign_flip() { + return _double_sign_flip; + } + + static address zero_blocks() { + return _zero_blocks; + } + + static address compare_long_string_LL() { + return _compare_long_string_LL; + } + + static address compare_long_string_LU() { + return _compare_long_string_LU; + } + + static address compare_long_string_UL() { + return _compare_long_string_UL; + } + + static address compare_long_string_UU() { + return _compare_long_string_UU; + } + + static address string_indexof_linear_ul() { + return _string_indexof_linear_ul; + } + + static address string_indexof_linear_ll() { + return _string_indexof_linear_ll; + } + + static address string_indexof_linear_uu() { + return _string_indexof_linear_uu; + } + + static address large_byte_array_inflate() { + return _large_byte_array_inflate; + } + + static address method_entry_barrier() { + return _method_entry_barrier; + } + + static bool complete() { + return _completed; + } + + static void set_completed() { + _completed = true; + } +}; + +#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp @@ -0,0 +1,1767 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/bytecodeTracer.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateInterpreterGenerator.hpp" +#include "interpreter/templateTable.hpp" +#include "memory/resourceArea.hpp" +#include "oops/arrayOop.hpp" +#include "oops/method.hpp" +#include "oops/methodData.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/jniHandles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" +#include "utilities/powerOfTwo.hpp" +#include + +#ifndef PRODUCT +#include "oops/method.hpp" +#endif // !PRODUCT + +// Size of interpreter code. Increase if too small. Interpreter will +// fail with a guarantee ("not enough space for interpreter generation"); +// if too small. +// Run with +PrintInterpreter to get the VM to print out the size. +// Max size with JVMTI +int TemplateInterpreter::InterpreterCodeSize = 256 * 1024; + +#define __ _masm-> + +//----------------------------------------------------------------------------- + +address TemplateInterpreterGenerator::generate_slow_signature_handler() { + address entry = __ pc(); + + __ andi(esp, esp, -16); + __ mv(c_rarg3, esp); + // xmethod + // xlocals + // c_rarg3: first stack arg - wordSize + // adjust sp + + __ addi(sp, c_rarg3, -18 * wordSize); + __ addi(sp, sp, -2 * wordSize); + __ sd(ra, Address(sp, 0)); + + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::slow_signature_handler), + xmethod, xlocals, c_rarg3); + + // x10: result handler + + // Stack layout: + // sp: return address <- sp + // 1 garbage + // 8 integer args (if static first is unused) + // 1 float/double identifiers + // 8 double args + // stack args <- esp + // garbage + // expression stack bottom + // bcp (NULL) + // ... + + // Restore ra + __ ld(ra, Address(sp, 0)); + __ addi(sp, sp , 2 * wordSize); + + // Do FP first so we can use c_rarg3 as temp + __ lwu(c_rarg3, Address(sp, 9 * wordSize)); // float/double identifiers + + for (int i = 0; i < Argument::n_float_register_parameters_c; i++) { + const FloatRegister r = g_FPArgReg[i]; + Label d, done; + + __ andi(t0, c_rarg3, 1UL << i); + __ bnez(t0, d); + __ flw(r, Address(sp, (10 + i) * wordSize)); + __ j(done); + __ bind(d); + __ fld(r, Address(sp, (10 + i) * wordSize)); + __ bind(done); + } + + // c_rarg0 contains the result from the call of + // InterpreterRuntime::slow_signature_handler so we don't touch it + // here. It will be loaded with the JNIEnv* later. + for (int i = 1; i < Argument::n_int_register_parameters_c; i++) { + const Register rm = g_INTArgReg[i]; + __ ld(rm, Address(sp, i * wordSize)); + } + + __ addi(sp, sp, 18 * wordSize); + __ ret(); + + return entry; +} + +// Various method entries +address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { + // xmethod: Method* + // x30: sender sp + // esp: args + + if (!InlineIntrinsics) { + return NULL; // Generate a vanilla entry + } + + // These don't need a safepoint check because they aren't virtually + // callable. We won't enter these intrinsics from compiled code. + // If in the future we added an intrinsic which was virtually callable + // we'd have to worry about how to safepoint so that this code is used. + + // mathematical functions inlined by compiler + // (interpreter must provide identical implementation + // in order to avoid monotonicity bugs when switching + // from interpreter to compiler in the middle of some + // computation) + // + // stack: + // [ arg ] <-- esp + // [ arg ] + // retaddr in ra + + address fn = NULL; + address entry_point = NULL; + Register continuation = ra; + switch (kind) { + case Interpreter::java_lang_math_abs: + entry_point = __ pc(); + __ fld(f10, Address(esp)); + __ fabs_d(f10, f10); + __ mv(sp, x30); // Restore caller's SP + break; + case Interpreter::java_lang_math_sqrt: + entry_point = __ pc(); + __ fld(f10, Address(esp)); + __ fsqrt_d(f10, f10); + __ mv(sp, x30); + break; + case Interpreter::java_lang_math_sin : + entry_point = __ pc(); + __ fld(f10, Address(esp)); + __ mv(sp, x30); + __ mv(x9, ra); + continuation = x9; // The first callee-saved register + if (StubRoutines::dsin() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin()); + } + __ mv(t0, fn); + __ jalr(t0); + break; + case Interpreter::java_lang_math_cos : + entry_point = __ pc(); + __ fld(f10, Address(esp)); + __ mv(sp, x30); + __ mv(x9, ra); + continuation = x9; // The first callee-saved register + if (StubRoutines::dcos() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos()); + } + __ mv(t0, fn); + __ jalr(t0); + break; + case Interpreter::java_lang_math_tan : + entry_point = __ pc(); + __ fld(f10, Address(esp)); + __ mv(sp, x30); + __ mv(x9, ra); + continuation = x9; // The first callee-saved register + if (StubRoutines::dtan() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan()); + } + __ mv(t0, fn); + __ jalr(t0); + break; + case Interpreter::java_lang_math_log : + entry_point = __ pc(); + __ fld(f10, Address(esp)); + __ mv(sp, x30); + __ mv(x9, ra); + continuation = x9; // The first callee-saved register + if (StubRoutines::dlog() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog()); + } + __ mv(t0, fn); + __ jalr(t0); + break; + case Interpreter::java_lang_math_log10 : + entry_point = __ pc(); + __ fld(f10, Address(esp)); + __ mv(sp, x30); + __ mv(x9, ra); + continuation = x9; // The first callee-saved register + if (StubRoutines::dlog10() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10()); + } + __ mv(t0, fn); + __ jalr(t0); + break; + case Interpreter::java_lang_math_exp : + entry_point = __ pc(); + __ fld(f10, Address(esp)); + __ mv(sp, x30); + __ mv(x9, ra); + continuation = x9; // The first callee-saved register + if (StubRoutines::dexp() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp()); + } + __ mv(t0, fn); + __ jalr(t0); + break; + case Interpreter::java_lang_math_pow : + entry_point = __ pc(); + __ mv(x9, ra); + continuation = x9; + __ fld(f10, Address(esp, 2 * Interpreter::stackElementSize)); + __ fld(f11, Address(esp)); + __ mv(sp, x30); + if (StubRoutines::dpow() == NULL) { + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); + } else { + fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow()); + } + __ mv(t0, fn); + __ jalr(t0); + break; + case Interpreter::java_lang_math_fmaD : + if (UseFMA) { + entry_point = __ pc(); + __ fld(f10, Address(esp, 4 * Interpreter::stackElementSize)); + __ fld(f11, Address(esp, 2 * Interpreter::stackElementSize)); + __ fld(f12, Address(esp)); + __ fmadd_d(f10, f10, f11, f12); + __ mv(sp, x30); // Restore caller's SP + } + break; + case Interpreter::java_lang_math_fmaF : + if (UseFMA) { + entry_point = __ pc(); + __ flw(f10, Address(esp, 2 * Interpreter::stackElementSize)); + __ flw(f11, Address(esp, Interpreter::stackElementSize)); + __ flw(f12, Address(esp)); + __ fmadd_s(f10, f10, f11, f12); + __ mv(sp, x30); // Restore caller's SP + } + break; + default: + ; + } + if (entry_point != NULL) { + __ jr(continuation); + } + + return entry_point; +} + +// Abstract method entry +// Attempt to execute abstract method. Throw exception +address TemplateInterpreterGenerator::generate_abstract_entry(void) { + // xmethod: Method* + // x30: sender SP + + address entry_point = __ pc(); + + // abstract method entry + + // pop return address, reset last_sp to NULL + __ empty_expression_stack(); + __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) + + // throw exception + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_AbstractMethodErrorWithMethod), + xmethod); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + return entry_point; +} + +address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { + address entry = __ pc(); + +#ifdef ASSERT + { + Label L; + __ ld(t0, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize)); + __ mv(t1, sp); + // maximal sp for current fp (stack grows negative) + // check if frame is complete + __ bge(t0, t1, L); + __ stop ("interpreter frame not set up"); + __ bind(L); + } +#endif // ASSERT + // Restore bcp under the assumption that the current frame is still + // interpreted + __ restore_bcp(); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // throw exception + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); + return entry; +} + +address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // setup parameters + + // convention: expect aberrant index in register x11 + __ zero_extend(c_rarg2, x11, 32); + // convention: expect array in register x13 + __ mv(c_rarg1, x13); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + throw_ArrayIndexOutOfBoundsException), + c_rarg1, c_rarg2); + return entry; +} + +address TemplateInterpreterGenerator::generate_ClassCastException_handler() { + address entry = __ pc(); + + // object is at TOS + __ pop_reg(c_rarg1); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + throw_ClassCastException), + c_rarg1); + return entry; +} + +address TemplateInterpreterGenerator::generate_exception_handler_common( + const char* name, const char* message, bool pass_oop) { + assert(!pass_oop || message == NULL, "either oop or message but not both"); + address entry = __ pc(); + if (pass_oop) { + // object is at TOS + __ pop_reg(c_rarg2); + } + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // setup parameters + __ la(c_rarg1, Address((address)name)); + if (pass_oop) { + __ call_VM(x10, CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + create_klass_exception), + c_rarg1, c_rarg2); + } else { + // kind of lame ExternalAddress can't take NULL because + // external_word_Relocation will assert. + if (message != NULL) { + __ la(c_rarg2, Address((address)message)); + } else { + __ mv(c_rarg2, NULL_WORD); + } + __ call_VM(x10, + CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), + c_rarg1, c_rarg2); + } + // throw exception + __ j(address(Interpreter::throw_exception_entry())); + return entry; +} + +address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { + address entry = __ pc(); + + // Restore stack bottom in case i2c adjusted stack + __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + // and NULL it as marker that esp is now tos until next java call + __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ restore_bcp(); + __ restore_locals(); + __ restore_constant_pool_cache(); + __ get_method(xmethod); + + if (state == atos) { + Register obj = x10; + Register mdp = x11; + Register tmp = x12; + __ ld(mdp, Address(xmethod, Method::method_data_offset())); + __ profile_return_type(mdp, obj, tmp); + } + + // Pop N words from the stack + __ get_cache_and_index_at_bcp(x11, x12, 1, index_size); + __ ld(x11, Address(x11, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ andi(x11, x11, ConstantPoolCacheEntry::parameter_size_mask); + + __ shadd(esp, x11, esp, t0, 3); + + // Restore machine SP + __ ld(t0, Address(xmethod, Method::const_offset())); + __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); + __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2); + __ ld(t1, + Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); + __ slli(t0, t0, 3); + __ sub(t0, t1, t0); + __ andi(sp, t0, -16); + + __ check_and_handle_popframe(xthread); + __ check_and_handle_earlyret(xthread); + + __ get_dispatch(); + __ dispatch_next(state, step); + + return entry; +} + +address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, + int step, + address continuation) { + address entry = __ pc(); + __ restore_bcp(); + __ restore_locals(); + __ restore_constant_pool_cache(); + __ get_method(xmethod); + __ get_dispatch(); + + // Calculate stack limit + __ ld(t0, Address(xmethod, Method::const_offset())); + __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); + __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2); + __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); + __ slli(t0, t0, 3); + __ sub(t0, t1, t0); + __ andi(sp, t0, -16); + + // Restore expression stack pointer + __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + // NULL last_sp until next java call + __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + + // handle exceptions + { + Label L; + __ ld(t0, Address(xthread, Thread::pending_exception_offset())); + __ beqz(t0, L); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + + if (continuation == NULL) { + __ dispatch_next(state, step); + } else { + __ jump_to_entry(continuation); + } + return entry; +} + +address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) { + address entry = __ pc(); + if (type == T_OBJECT) { + // retrieve result from frame + __ ld(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize)); + // and verify it + __ verify_oop(x10); + } else { + __ cast_primitive_type(type, x10); + } + + __ ret(); // return from result handler + return entry; +} + +address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, + address runtime_entry) { + assert_cond(runtime_entry != NULL); + address entry = __ pc(); + __ push(state); + __ call_VM(noreg, runtime_entry); + __ fence(0xf, 0xf); + __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); + return entry; +} + +// Helpers for commoning out cases in the various type of method entries. +// + + +// increment invocation count & check for overflow +// +// Note: checking for negative value instead of overflow +// so we have a 'sticky' overflow test +// +// xmethod: method +// +void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) { + Label done; + // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. + int increment = InvocationCounter::count_increment; + Label no_mdo; + if (ProfileInterpreter) { + // Are we profiling? + __ ld(x10, Address(xmethod, Method::method_data_offset())); + __ beqz(x10, no_mdo); + // Increment counter in the MDO + const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + const Address mask(x10, in_bytes(MethodData::invoke_mask_offset())); + __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow); + __ j(done); + } + __ bind(no_mdo); + // Increment counter in MethodCounters + const Address invocation_counter(t1, + MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); + __ get_method_counters(xmethod, t1, done); + const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); + __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); + __ bind(done); +} + +void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { + __ mv(c_rarg1, zr); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), c_rarg1); + __ j(do_continue); +} + +// See if we've got enough room on the stack for locals plus overhead +// below JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError +// without going through the signal handler, i.e., reserved and yellow zones +// will not be made usable. The shadow zone must suffice to handle the +// overflow. +// The expression stack grows down incrementally, so the normal guard +// page mechanism will work for that. +// +// NOTE: Since the additional locals are also always pushed (wasn't +// obvious in generate_method_entry) so the guard should work for them +// too. +// +// Args: +// x13: number of additional locals this frame needs (what we must check) +// xmethod: Method* +// +// Kills: +// x10 +void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { + + // monitor entry size: see picture of stack set + // (generate_method_entry) and frame_amd64.hpp + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + // total overhead size: entry_size + (saved fp through expr stack + // bottom). be sure to change this if you add/subtract anything + // to/from the overhead area + const int overhead_size = + -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size; + + const int page_size = os::vm_page_size(); + + Label after_frame_check; + + // see if the frame is greater than one page in size. If so, + // then we need to verify there is enough stack space remaining + // for the additional locals. + __ mv(t0, (page_size - overhead_size) / Interpreter::stackElementSize); + __ bleu(x13, t0, after_frame_check); + + // compute sp as if this were going to be the last frame on + // the stack before the red zone + + // locals + overhead, in bytes + __ mv(x10, overhead_size); + __ shadd(x10, x13, x10, t0, Interpreter::logStackElementSize); // 2 slots per parameter. + + const Address stack_limit(xthread, JavaThread::stack_overflow_limit_offset()); + __ ld(t0, stack_limit); + +#ifdef ASSERT + Label limit_okay; + // Verify that thread stack limit is non-zero. + __ bnez(t0, limit_okay); + __ stop("stack overflow limit is zero"); + __ bind(limit_okay); +#endif + + // Add stack limit to locals. + __ add(x10, x10, t0); + + // Check against the current stack bottom. + __ bgtu(sp, x10, after_frame_check); + + // Remove the incoming args, peeling the machine SP back to where it + // was in the caller. This is not strictly necessary, but unless we + // do so the stack frame may have a garbage FP; this ensures a + // correct call stack that we can always unwind. The ANDI should be + // unnecessary because the sender SP in x30 is always aligned, but + // it doesn't hurt. + __ andi(sp, x30, -16); + + // Note: the restored frame is not necessarily interpreted. + // Use the shared runtime version of the StackOverflowError. + assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); + __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry())); + + // all done with frame size check + __ bind(after_frame_check); +} + +// Allocate monitor and lock method (asm interpreter) +// +// Args: +// xmethod: Method* +// xlocals: locals +// +// Kills: +// x10 +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs) +// t0, t1 (temporary regs) +void TemplateInterpreterGenerator::lock_method() { + // synchronize method + const Address access_flags(xmethod, Method::access_flags_offset()); + const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + +#ifdef ASSERT + __ lwu(x10, access_flags); + __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method doesn't need synchronization", false); +#endif // ASSERT + + // get synchronization object + { + Label done; + __ lwu(x10, access_flags); + __ andi(t0, x10, JVM_ACC_STATIC); + // get receiver (assume this is frequent case) + __ ld(x10, Address(xlocals, Interpreter::local_offset_in_bytes(0))); + __ beqz(t0, done); + __ load_mirror(x10, xmethod); + +#ifdef ASSERT + { + Label L; + __ bnez(x10, L); + __ stop("synchronization object is NULL"); + __ bind(L); + } +#endif // ASSERT + + __ bind(done); + } + + // add space for monitor & lock + __ add(sp, sp, - entry_size); // add space for a monitor entry + __ add(esp, esp, - entry_size); + __ mv(t0, esp); + __ sd(t0, monitor_block_top); // set new monitor block top + // store object + __ sd(x10, Address(esp, BasicObjectLock::obj_offset_in_bytes())); + __ mv(c_rarg1, esp); // object address + __ lock_object(c_rarg1); +} + +// Generate a fixed interpreter frame. This is identical setup for +// interpreted methods and for native methods hence the shared code. +// +// Args: +// ra: return address +// xmethod: Method* +// xlocals: pointer to locals +// xcpool: cp cache +// stack_pointer: previous sp +void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { + // initialize fixed part of activation frame + if (native_call) { + __ add(esp, sp, - 14 * wordSize); + __ mv(xbcp, zr); + __ add(sp, sp, - 14 * wordSize); + // add 2 zero-initialized slots for native calls + __ sd(zr, Address(sp, 13 * wordSize)); + __ sd(zr, Address(sp, 12 * wordSize)); + } else { + __ add(esp, sp, - 12 * wordSize); + __ ld(t0, Address(xmethod, Method::const_offset())); // get ConstMethod + __ add(xbcp, t0, in_bytes(ConstMethod::codes_offset())); // get codebase + __ add(sp, sp, - 12 * wordSize); + } + __ sd(xbcp, Address(sp, wordSize)); + __ sd(esp, Address(sp, 0)); + + if (ProfileInterpreter) { + Label method_data_continue; + __ ld(t0, Address(xmethod, Method::method_data_offset())); + __ beqz(t0, method_data_continue); + __ la(t0, Address(t0, in_bytes(MethodData::data_offset()))); + __ bind(method_data_continue); + } + + __ sd(xmethod, Address(sp, 7 * wordSize)); + __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize)); + + // Get mirror and store it in the frame as GC root for this Method* + __ load_mirror(t2, xmethod); + __ sd(zr, Address(sp, 5 * wordSize)); + __ sd(t2, Address(sp, 4 * wordSize)); + + __ ld(xcpool, Address(xmethod, Method::const_offset())); + __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset())); + __ ld(xcpool, Address(xcpool, ConstantPool::cache_offset_in_bytes())); + __ sd(xcpool, Address(sp, 3 * wordSize)); + __ sd(xlocals, Address(sp, 2 * wordSize)); + + __ sd(ra, Address(sp, 11 * wordSize)); + __ sd(fp, Address(sp, 10 * wordSize)); + __ la(fp, Address(sp, 12 * wordSize)); // include ra & fp + + // set sender sp + // leave last_sp as null + __ sd(x30, Address(sp, 9 * wordSize)); + __ sd(zr, Address(sp, 8 * wordSize)); + + // Move SP out of the way + if (!native_call) { + __ ld(t0, Address(xmethod, Method::const_offset())); + __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); + __ add(t0, t0, frame::interpreter_frame_monitor_size() + 2); + __ slli(t0, t0, 3); + __ sub(t0, sp, t0); + __ andi(sp, t0, -16); + } +} + +// End of helpers + +// Various method entries +//------------------------------------------------------------------------------------------------------------------------ +// +// + +// Method entry for java.lang.ref.Reference.get. +address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { + // Code: _aload_0, _getfield, _areturn + // parameter size = 1 + // + // The code that gets generated by this routine is split into 2 parts: + // 1. The "intrinsified" code for G1 (or any SATB based GC), + // 2. The slow path - which is an expansion of the regular method entry. + // + // Notes:- + // * In the G1 code we do not check whether we need to block for + // a safepoint. If G1 is enabled then we must execute the specialized + // code for Reference.get (except when the Reference object is null) + // so that we can log the value in the referent field with an SATB + // update buffer. + // If the code for the getfield template is modified so that the + // G1 pre-barrier code is executed when the current method is + // Reference.get() then going through the normal method entry + // will be fine. + // * The G1 code can, however, check the receiver object (the instance + // of java.lang.Reference) and jump to the slow path if null. If the + // Reference object is null then we obviously cannot fetch the referent + // and so we don't need to call the G1 pre-barrier. Thus we can use the + // regular method entry code to generate the NPE. + // + // This code is based on generate_accessor_entry. + // + // xmethod: Method* + // x30: senderSP must preserve for slow path, set SP to it on fast path + + // ra is live. It must be saved around calls. + + address entry = __ pc(); + + const int referent_offset = java_lang_ref_Reference::referent_offset(); + guarantee(referent_offset > 0, "referent offset not initialized"); + + Label slow_path; + const Register local_0 = c_rarg0; + // Check if local 0 != NULL + // If the receiver is null then it is OK to jump to the slow path. + __ ld(local_0, Address(esp, 0)); + __ beqz(local_0, slow_path); + + __ mv(x9, x30); // Move senderSP to a callee-saved register + + // Load the value of the referent field. + const Address field_address(local_0, referent_offset); + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ t1, /*tmp2*/ t0); + + // areturn + __ andi(sp, x9, -16); // done with stack + __ ret(); + + // generate a vanilla interpreter entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); + return entry; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.update(int crc, int b) + */ +address TemplateInterpreterGenerator::generate_CRC32_update_entry() { + // TODO: Unimplemented generate_CRC32_update_entry + return 0; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) + * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) + */ +address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + // TODO: Unimplemented generate_CRC32_updateBytes_entry + return 0; +} + +/** + * Method entry for intrinsic-candidate (non-native) methods: + * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) + * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end) + * Unlike CRC32, CRC32C does not have any methods marked as native + * CRC32C also uses an "end" variable instead of the length variable CRC32 uses + */ +address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + // TODO: Unimplemented generate_CRC32C_updateBytes_entry + return 0; +} + +void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { + // Bang each page in the shadow zone. We can't assume it's been done for + // an interpreter frame with greater than a page of locals, so each page + // needs to be checked. Only true for non-native. + const int page_size = os::vm_page_size(); + const int n_shadow_pages = ((int)StackOverflow::stack_shadow_zone_size()) / page_size; + const int start_page = native_call ? n_shadow_pages : 1; + for (int pages = start_page; pages <= n_shadow_pages; pages++) { + __ bang_stack_with_offset(pages*page_size); + } +} + +// Interpreter stub for calling a native method. (asm interpreter) +// This sets up a somewhat different looking stack for calling the +// native method than the typical interpreter frame setup. +address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // x11: Method* + // x30: sender sp + + address entry_point = __ pc(); + + const Address constMethod (xmethod, Method::const_offset()); + const Address access_flags (xmethod, Method::access_flags_offset()); + const Address size_of_parameters(x12, ConstMethod:: + size_of_parameters_offset()); + + // get parameter size (always needed) + __ ld(x12, constMethod); + __ load_unsigned_short(x12, size_of_parameters); + + // Native calls don't need the stack size check since they have no + // expression stack and the arguments are already on the stack and + // we only add a handful of words to the stack. + + // xmethod: Method* + // x12: size of parameters + // x30: sender sp + + // for natives the size of locals is zero + + // compute beginning of parameters (xlocals) + __ shadd(xlocals, x12, esp, xlocals, 3); + __ addi(xlocals, xlocals, -wordSize); + + // Pull SP back to minimum size: this avoids holes in the stack + __ andi(sp, esp, -16); + + // initialize fixed part of activation frame + generate_fixed_frame(true); + + // make sure method is native & not abstract +#ifdef ASSERT + __ lwu(x10, access_flags); + __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute non-native method as native", false); + __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter"); +#endif + + // Since at this point in the method invocation the exception + // handler would try to exit the monitor of synchronized methods + // which hasn't been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation + // will check this flag. + + const Address do_not_unlock_if_synchronized(xthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + __ mv(t1, true); + __ sb(t1, do_not_unlock_if_synchronized); + + // increment invocation count & check for overflow + Label invocation_counter_overflow; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow); + } + + Label continue_after_compile; + __ bind(continue_after_compile); + + bang_stack_shadow_pages(true); + + // reset the _do_not_unlock_if_synchronized flag + __ sb(zr, do_not_unlock_if_synchronized); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + __ lwu(x10, access_flags); + __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization"); +#endif + } + + // start execution +#ifdef ASSERT + __ verify_frame_setup(); +#endif + + // jvmti support + __ notify_method_entry(); + + // work registers + const Register t = x18; + const Register result_handler = x19; + + // allocate space for parameters + __ ld(t, Address(xmethod, Method::const_offset())); + __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset())); + + __ slli(t, t, Interpreter::logStackElementSize); + __ sub(x30, esp, t); + __ andi(sp, x30, -16); + __ mv(esp, x30); + + // get signature handler + { + Label L; + __ ld(t, Address(xmethod, Method::signature_handler_offset())); + __ bnez(t, L); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::prepare_native_call), + xmethod); + __ ld(t, Address(xmethod, Method::signature_handler_offset())); + __ bind(L); + } + + // call signature handler + assert(InterpreterRuntime::SignatureHandlerGenerator::from() == xlocals, + "adjust this code"); + assert(InterpreterRuntime::SignatureHandlerGenerator::to() == sp, + "adjust this code"); + assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t0, + "adjust this code"); + + // The generated handlers do not touch xmethod (the method). + // However, large signatures cannot be cached and are generated + // each time here. The slow-path generator can do a GC on return, + // so we must reload it after the call. + __ jalr(t); + __ get_method(xmethod); // slow path can do a GC, reload xmethod + + + // result handler is in x10 + // set result handler + __ mv(result_handler, x10); + // pass mirror handle if static call + { + Label L; + __ lwu(t, Address(xmethod, Method::access_flags_offset())); + __ andi(t0, t, JVM_ACC_STATIC); + __ beqz(t0, L); + // get mirror + __ load_mirror(t, xmethod); + // copy mirror into activation frame + __ sd(t, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize)); + // pass handle to mirror + __ addi(c_rarg1, fp, frame::interpreter_frame_oop_temp_offset * wordSize); + __ bind(L); + } + + // get native function entry point in x28 + { + Label L; + __ ld(x28, Address(xmethod, Method::native_function_offset())); + address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); + __ mv(t1, unsatisfied); + __ ld(t1, t1); + __ bne(x28, t1, L); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::prepare_native_call), + xmethod); + __ get_method(xmethod); + __ ld(x28, Address(xmethod, Method::native_function_offset())); + __ bind(L); + } + + // pass JNIEnv + __ add(c_rarg0, xthread, in_bytes(JavaThread::jni_environment_offset())); + + // It is enough that the pc() points into the right code + // segment. It does not have to be the correct return pc. + Label native_return; + __ set_last_Java_frame(esp, fp, native_return, x30); + + // change thread state +#ifdef ASSERT + { + Label L; + __ lwu(t, Address(xthread, JavaThread::thread_state_offset())); + __ addi(t0, zr, (u1)_thread_in_Java); + __ beq(t, t0, L); + __ stop("Wrong thread state in native stub"); + __ bind(L); + } +#endif + + // Change state to native + __ la(t1, Address(xthread, JavaThread::thread_state_offset())); + __ mv(t0, _thread_in_native); + __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + __ sw(t0, Address(t1)); + + // Call the native method. + __ jalr(x28); + __ bind(native_return); + __ get_method(xmethod); + // result potentially in x10 or f10 + + // make room for the pushes we're about to do + __ sub(t0, esp, 4 * wordSize); + __ andi(sp, t0, -16); + + // NOTE: The order of these pushes is known to frame::interpreter_frame_result + // in order to extract the result of a method call. If the order of these + // pushes change or anything else is added to the stack then the code in + // interpreter_frame_result must also change. + __ push(dtos); + __ push(ltos); + + // change thread state + // Force all preceding writes to be observed prior to thread state change + __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + + __ mv(t0, _thread_in_native_trans); + __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); + + // Force this write out before the read below + __ membar(MacroAssembler::AnyAny); + + // check for safepoint operation in progress and/or pending suspend requests + { + Label L, Continue; + + // We need an acquire here to ensure that any subsequent load of the + // global SafepointSynchronize::_state flag is ordered after this load + // of the thread-local polling word. We don't want this poll to + // return false (i.e. not safepointing) and a later poll of the global + // SafepointSynchronize::_state spuriously to return true. + // + // This is to avoid a race when we're in a native->Java transition + // racing the code which wakes up from a safepoint. + __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */); + __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset())); + __ beqz(t1, Continue); + __ bind(L); + + // Don't use call_VM as it will see a possible pending exception + // and forward it and never return here preventing us from + // clearing _last_native_pc down below. So we do a runtime call by + // hand. + // + __ mv(c_rarg0, xthread); + __ mv(t1, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)); + __ jalr(t1); + __ get_method(xmethod); + __ reinit_heapbase(); + __ bind(Continue); + } + + // change thread state + // Force all preceding writes to be observed prior to thread state change + __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + + __ mv(t0, _thread_in_Java); + __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); + + // reset_last_Java_frame + __ reset_last_Java_frame(true); + + if (CheckJNICalls) { + // clear_pending_jni_exception_check + __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset())); + } + + // reset handle block + __ ld(t, Address(xthread, JavaThread::active_handles_offset())); + __ sd(zr, Address(t, JNIHandleBlock::top_offset_in_bytes())); + + // If result is an oop unbox and store it in frame where gc will see it + // and result handler will pick it up + + { + Label no_oop; + __ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT))); + __ bne(t, result_handler, no_oop); + // Unbox oop result, e.g. JNIHandles::resolve result. + __ pop(ltos); + __ resolve_jobject(x10, xthread, t); + __ sd(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize)); + // keep stack depth as expected by pushing oop which will eventually be discarded + __ push(ltos); + __ bind(no_oop); + } + + { + Label no_reguard; + __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset()))); + __ addi(t1, zr, (u1)StackOverflow::stack_guard_yellow_reserved_disabled); + __ bne(t0, t1, no_reguard); + + __ pusha(); // only save smashed registers + __ mv(c_rarg0, xthread); + __ mv(t1, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); + __ jalr(t1); + __ popa(); // only restore smashed registers + __ bind(no_reguard); + } + + // The method register is junk from after the thread_in_native transition + // until here. Also can't call_VM until the bcp has been + // restored. Need bcp for throwing exception below so get it now. + __ get_method(xmethod); + + // restore bcp to have legal interpreter frame, i.e., bci == 0 <=> + // xbcp == code_base() + __ ld(xbcp, Address(xmethod, Method::const_offset())); // get ConstMethod* + __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); // get codebase + // handle exceptions (exception handling will handle unlocking!) + { + Label L; + __ ld(t0, Address(xthread, Thread::pending_exception_offset())); + __ beqz(t0, L); + // Note: At some point we may want to unify this with the code + // used in call_VM_base(); i.e., we should use the + // StubRoutines::forward_exception code. For now this doesn't work + // here because the sp is not correctly set at this point. + __ MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + + // do unlocking if necessary + { + Label L; + __ lwu(t, Address(xmethod, Method::access_flags_offset())); + __ andi(t0, t, JVM_ACC_SYNCHRONIZED); + __ beqz(t0, L); + // the code below should be shared with interpreter macro + // assembler implementation + { + Label unlock; + // BasicObjectLock will be first in list, since this is a + // synchronized method. However, need to check that the object + // has not been unlocked by an explicit monitorexit bytecode. + + // monitor expect in c_rarg1 for slow unlock path + __ la(c_rarg1, Address(fp, // address of first monitor + (intptr_t)(frame::interpreter_frame_initial_sp_offset * + wordSize - sizeof(BasicObjectLock)))); + + __ ld(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + __ bnez(t, unlock); + + // Entry already unlocked, need to throw exception + __ MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + __ bind(unlock); + __ unlock_object(c_rarg1); + } + __ bind(L); + } + + // jvmti support + // Note: This must happen _after_ handling/throwing any exceptions since + // the exception handler code notifies the runtime of method exits + // too. If this happens before, method entry/exit notifications are + // not properly paired (was bug - gri 11/22/99). + __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); + + __ pop(ltos); + __ pop(dtos); + + __ jalr(result_handler); + + // remove activation + __ ld(esp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp + // remove frame anchor + __ leave(); + + // restore sender sp + __ mv(sp, esp); + + __ ret(); + + if (inc_counter) { + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(continue_after_compile); + } + + return entry_point; +} + +// +// Generic interpreted method entry to (asm) interpreter +// +address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { + + // determine code generation flags + const bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // t0: sender sp + address entry_point = __ pc(); + + const Address constMethod(xmethod, Method::const_offset()); + const Address access_flags(xmethod, Method::access_flags_offset()); + const Address size_of_parameters(x13, + ConstMethod::size_of_parameters_offset()); + const Address size_of_locals(x13, ConstMethod::size_of_locals_offset()); + + // get parameter size (always needed) + // need to load the const method first + __ ld(x13, constMethod); + __ load_unsigned_short(x12, size_of_parameters); + + // x12: size of parameters + + __ load_unsigned_short(x13, size_of_locals); // get size of locals in words + __ sub(x13, x13, x12); // x13 = no. of additional locals + + // see if we've got enough room on the stack for locals plus overhead. + generate_stack_overflow_check(); + + // compute beginning of parameters (xlocals) + __ shadd(xlocals, x12, esp, t1, 3); + __ add(xlocals, xlocals, -wordSize); + + // Make room for additional locals + __ slli(t1, x13, 3); + __ sub(t0, esp, t1); + + // Padding between locals and fixed part of activation frame to ensure + // SP is always 16-byte aligned. + __ andi(sp, t0, -16); + + // x13 - # of additional locals + // allocate space for locals + // explicitly initialize locals + { + Label exit, loop; + __ blez(x13, exit); // do nothing if x13 <= 0 + __ bind(loop); + __ sd(zr, Address(t0)); + __ add(t0, t0, wordSize); + __ add(x13, x13, -1); // until everything initialized + __ bnez(x13, loop); + __ bind(exit); + } + + // And the base dispatch table + __ get_dispatch(); + + // initialize fixed part of activation frame + generate_fixed_frame(false); + + // make sure method is not native & not abstract +#ifdef ASSERT + __ lwu(x10, access_flags); + __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute native method as non-native"); + __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter"); +#endif + + // Since at this point in the method invocation the exception + // handler would try to exit the monitor of synchronized methods + // which hasn't been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation + // will check this flag. + + const Address do_not_unlock_if_synchronized(xthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + __ mv(t1, true); + __ sb(t1, do_not_unlock_if_synchronized); + + Label no_mdp; + const Register mdp = x13; + __ ld(mdp, Address(xmethod, Method::method_data_offset())); + __ beqz(mdp, no_mdp); + __ add(mdp, mdp, in_bytes(MethodData::data_offset())); + __ profile_parameters_type(mdp, x11, x12, x14); // use x11, x12, x14 as tmp registers + __ bind(no_mdp); + + // increment invocation count & check for overflow + Label invocation_counter_overflow; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow); + } + + Label continue_after_compile; + __ bind(continue_after_compile); + + bang_stack_shadow_pages(false); + + // reset the _do_not_unlock_if_synchronized flag + __ sb(zr, do_not_unlock_if_synchronized); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + // Allocate monitor and lock method + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + __ lwu(x10, access_flags); + __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization"); +#endif + } + + // start execution +#ifdef ASSERT + __ verify_frame_setup(); +#endif + + // jvmti support + __ notify_method_entry(); + + __ dispatch_next(vtos); + + // invocation counter overflow + if (inc_counter) { + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(continue_after_compile); + } + + return entry_point; +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateInterpreterGenerator::generate_throw_exception() { + // Entry point in previous activation (i.e., if the caller was + // interpreted) + Interpreter::_rethrow_exception_entry = __ pc(); + // Restore sp to interpreter_frame_last_sp even though we are going + // to empty the expression stack for the exception processing. + __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + // x10: exception + // x13: return address/pc that threw exception + __ restore_bcp(); // xbcp points to call/send + __ restore_locals(); + __ restore_constant_pool_cache(); + __ reinit_heapbase(); // restore xheapbase as heapbase. + __ get_dispatch(); + + // Entry point for exceptions thrown within interpreter code + Interpreter::_throw_exception_entry = __ pc(); + // If we came here via a NullPointerException on the receiver of a + // method, xthread may be corrupt. + __ get_method(xmethod); + // expression stack is undefined here + // x10: exception + // xbcp: exception bcp + __ verify_oop(x10); + __ mv(c_rarg1, x10); + + // expression stack must be empty before entering the VM in case of + // an exception + __ empty_expression_stack(); + // find exception handler address and preserve exception oop + __ call_VM(x13, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::exception_handler_for_exception), + c_rarg1); + + // Calculate stack limit + __ ld(t0, Address(xmethod, Method::const_offset())); + __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); + __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4); + __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); + __ slli(t0, t0, 3); + __ sub(t0, t1, t0); + __ andi(sp, t0, -16); + + // x10: exception handler entry point + // x13: preserved exception oop + // xbcp: bcp for exception handler + __ push_ptr(x13); // push exception which is now the only value on the stack + __ jr(x10); // jump to exception handler (may be _remove_activation_entry!) + + // If the exception is not handled in the current frame the frame is + // removed and the exception is rethrown (i.e. exception + // continuation is _rethrow_exception). + // + // Note: At this point the bci is still the bxi for the instruction + // which caused the exception and the expression stack is + // empty. Thus, for any VM calls at this point, GC will find a legal + // oop map (with empty expression stack). + + // + // JVMTI PopFrame support + // + + Interpreter::_remove_activation_preserving_args_entry = __ pc(); + __ empty_expression_stack(); + // Set the popframe_processing bit in pending_popframe_condition + // indicating that we are currently handling popframe, so that + // call_VMs that may happen later do not trigger new popframe + // handling cycles. + __ lwu(x13, Address(xthread, JavaThread::popframe_condition_offset())); + __ ori(x13, x13, JavaThread::popframe_processing_bit); + __ sw(x13, Address(xthread, JavaThread::popframe_condition_offset())); + + { + // Check to see whether we are returning to a deoptimized frame. + // (The PopFrame call ensures that the caller of the popped frame is + // either interpreted or compiled and deoptimizes it if compiled.) + // In this case, we can't call dispatch_next() after the frame is + // popped, but instead must save the incoming arguments and restore + // them after deoptimization has occurred. + // + // Note that we don't compare the return PC against the + // deoptimization blob's unpack entry because of the presence of + // adapter frames in C2. + Label caller_not_deoptimized; + __ ld(c_rarg1, Address(fp, frame::return_addr_offset * wordSize)); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), c_rarg1); + __ bnez(x10, caller_not_deoptimized); + + // Compute size of arguments for saving when returning to + // deoptimized caller + __ get_method(x10); + __ ld(x10, Address(x10, Method::const_offset())); + __ load_unsigned_short(x10, Address(x10, in_bytes(ConstMethod:: + size_of_parameters_offset()))); + __ slli(x10, x10, Interpreter::logStackElementSize); + __ restore_locals(); + __ sub(xlocals, xlocals, x10); + __ add(xlocals, xlocals, wordSize); + // Save these arguments + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, + Deoptimization:: + popframe_preserve_args), + xthread, x10, xlocals); + + __ remove_activation(vtos, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false, + /* notify_jvmdi */ false); + + // Inform deoptimization that it is responsible for restoring + // these arguments + __ mv(t0, JavaThread::popframe_force_deopt_reexecution_bit); + __ sw(t0, Address(xthread, JavaThread::popframe_condition_offset())); + + // Continue in deoptimization handler + __ ret(); + + __ bind(caller_not_deoptimized); + } + + __ remove_activation(vtos, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false, + /* notify_jvmdi */ false); + + // Restore the last_sp and null it out + __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + + __ restore_bcp(); + __ restore_locals(); + __ restore_constant_pool_cache(); + __ get_method(xmethod); + __ get_dispatch(); + + // The method data pointer was incremented already during + // call profiling. We have to restore the mdp for the current bcp. + if (ProfileInterpreter) { + __ set_method_data_pointer_for_bcp(); + } + + // Clear the popframe condition flag + __ sw(zr, Address(xthread, JavaThread::popframe_condition_offset())); + assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive"); + +#if INCLUDE_JVMTI + { + Label L_done; + + __ lbu(t0, Address(xbcp, 0)); + __ li(t1, Bytecodes::_invokestatic); + __ bne(t1, t0, L_done); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. + // Detect such a case in the InterpreterRuntime function and return the member name argument,or NULL. + + __ ld(c_rarg0, Address(xlocals, 0)); + __ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null),c_rarg0, xmethod, xbcp); + + __ beqz(x10, L_done); + + __ sd(x10, Address(esp, 0)); + __ bind(L_done); + } +#endif // INCLUDE_JVMTI + + // Restore machine SP + __ ld(t0, Address(xmethod, Method::const_offset())); + __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); + __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4); + __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); + __ slliw(t0, t0, 3); + __ sub(t0, t1, t0); + __ andi(sp, t0, -16); + + __ dispatch_next(vtos); + // end of PopFrame support + + Interpreter::_remove_activation_entry = __ pc(); + + // preserve exception over this code sequence + __ pop_ptr(x10); + __ sd(x10, Address(xthread, JavaThread::vm_result_offset())); + // remove the activation (without doing throws on illegalMonitorExceptions) + __ remove_activation(vtos, false, true, false); + // restore exception + __ get_vm_result(x10, xthread); + + // In between activations - previous activation type unknown yet + // compute continuation point - the continuation point expects the + // following registers set up: + // + // x10: exception + // ra: return address/pc that threw exception + // sp: expression stack of caller + // fp: fp of caller + // FIXME: There's no point saving ra here because VM calls don't trash it + __ sub(sp, sp, 2 * wordSize); + __ sd(x10, Address(sp, 0)); // save exception + __ sd(ra, Address(sp, wordSize)); // save return address + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, + SharedRuntime::exception_handler_for_return_address), + xthread, ra); + __ mv(x11, x10); // save exception handler + __ ld(x10, Address(sp, 0)); // restore exception + __ ld(ra, Address(sp, wordSize)); // restore return address + __ add(sp, sp, 2 * wordSize); + // We might be returning to a deopt handler that expects x13 to + // contain the exception pc + __ mv(x13, ra); + // Note that an "issuing PC" is actually the next PC after the call + __ jr(x11); // jump to exception + // handler of caller +} + +// +// JVMTI ForceEarlyReturn support +// +address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { + address entry = __ pc(); + + __ restore_bcp(); + __ restore_locals(); + __ empty_expression_stack(); + __ load_earlyret_value(state); + + __ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); + Address cond_addr(t0, JvmtiThreadState::earlyret_state_offset()); + + // Clear the earlyret state + assert(JvmtiThreadState::earlyret_inactive == 0, "should be"); + __ sd(zr, cond_addr); + + __ remove_activation(state, + false, /* throw_monitor_exception */ + false, /* install_monitor_exception */ + true); /* notify_jvmdi */ + __ ret(); + + return entry; +} +// end of ForceEarlyReturn support + +//----------------------------------------------------------------------------- +// Helper for vtos entry point generation + +void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, + address& bep, + address& cep, + address& sep, + address& aep, + address& iep, + address& lep, + address& fep, + address& dep, + address& vep) { + assert(t != NULL && t->is_valid() && t->tos_in() == vtos, "illegal template"); + Label L; + aep = __ pc(); __ push_ptr(); __ j(L); + fep = __ pc(); __ push_f(); __ j(L); + dep = __ pc(); __ push_d(); __ j(L); + lep = __ pc(); __ push_l(); __ j(L); + bep = cep = sep = + iep = __ pc(); __ push_i(); + vep = __ pc(); + __ bind(L); + generate_and_dispatch(t); +} + +//----------------------------------------------------------------------------- + +// Non-product code +#ifndef PRODUCT +address TemplateInterpreterGenerator::generate_trace_code(TosState state) { + address entry = __ pc(); + + __ push_reg(ra); + __ push(state); + __ push_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp); + __ mv(c_rarg2, x10); // Pass itos + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), c_rarg1, c_rarg2, c_rarg3); + __ pop_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp); + __ pop(state); + __ pop_reg(ra); + __ ret(); // return from result handler + + return entry; +} + +void TemplateInterpreterGenerator::count_bytecode() { + __ push_reg(t0); + __ push_reg(x10); + __ mv(x10, (address) &BytecodeCounter::_counter_value); + __ li(t0, 1); + __ amoadd_d(zr, x10, t0, Assembler::aqrl); + __ pop_reg(x10); + __ pop_reg(t0); +} + +void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; } + +void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; } + +void TemplateInterpreterGenerator::trace_bytecode(Template* t) { + // Call a little run-time stub to avoid blow-up for each bytecode. + // The run-time runtime saves the right registers, depending on + // the tosca in-state for the given template. + + assert(Interpreter::trace_code(t->tos_in()) != NULL, "entry must have been generated"); + __ jal(Interpreter::trace_code(t->tos_in())); + __ reinit_heapbase(); +} + +void TemplateInterpreterGenerator::stop_interpreter_at() { + Label L; + __ push_reg(t0); + __ mv(t0, (address) &BytecodeCounter::_counter_value); + __ ld(t0, Address(t0)); + __ mv(t1, StopInterpreterAt); + __ bne(t0, t1, L); + __ ebreak(); + __ bind(L); + __ pop_reg(t0); +} + +#endif // !PRODUCT Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/templateTable_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/templateTable_riscv.cpp @@ -0,0 +1,3955 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "gc/shared/tlab_globals.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateTable.hpp" +#include "memory/universe.hpp" +#include "oops/method.hpp" +#include "oops/methodData.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "utilities/powerOfTwo.hpp" + +#define __ _masm-> + +// Address computation: local variables + +static inline Address iaddress(int n) { + return Address(xlocals, Interpreter::local_offset_in_bytes(n)); +} + +static inline Address laddress(int n) { + return iaddress(n + 1); +} + +static inline Address faddress(int n) { + return iaddress(n); +} + +static inline Address daddress(int n) { + return laddress(n); +} + +static inline Address aaddress(int n) { + return iaddress(n); +} + +static inline Address iaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { + assert_cond(_masm != NULL); + _masm->shadd(temp, r, xlocals, temp, 3); + return Address(temp, 0); +} + +static inline Address laddress(Register r, Register temp, + InterpreterMacroAssembler* _masm) { + assert_cond(_masm != NULL); + _masm->shadd(temp, r, xlocals, temp, 3); + return Address(temp, Interpreter::local_offset_in_bytes(1));; +} + +static inline Address faddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { + return iaddress(r, temp, _masm); +} + +static inline Address daddress(Register r, Register temp, + InterpreterMacroAssembler* _masm) { + return laddress(r, temp, _masm); +} + +static inline Address aaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { + return iaddress(r, temp, _masm); +} + +static inline Address at_rsp() { + return Address(esp, 0); +} + +// At top of Java expression stack which may be different than esp(). It +// isn't for category 1 objects. +static inline Address at_tos () { + return Address(esp, Interpreter::expr_offset_in_bytes(0)); +} + +static inline Address at_tos_p1() { + return Address(esp, Interpreter::expr_offset_in_bytes(1)); +} + +static inline Address at_tos_p2() { + return Address(esp, Interpreter::expr_offset_in_bytes(2)); +} + +static inline Address at_tos_p3() { + return Address(esp, Interpreter::expr_offset_in_bytes(3)); +} + +static inline Address at_tos_p4() { + return Address(esp, Interpreter::expr_offset_in_bytes(4)); +} + +static inline Address at_tos_p5() { + return Address(esp, Interpreter::expr_offset_in_bytes(5)); +} + +// Miscelaneous helper routines +// Store an oop (or NULL) at the Address described by obj. +// If val == noreg this means store a NULL +static void do_oop_store(InterpreterMacroAssembler* _masm, + Address dst, + Register val, + DecoratorSet decorators) { + assert(val == noreg || val == x10, "parameter is just for looks"); + assert_cond(_masm != NULL); + __ store_heap_oop(dst, val, x29, x11, decorators); +} + +static void do_oop_load(InterpreterMacroAssembler* _masm, + Address src, + Register dst, + DecoratorSet decorators) { + assert_cond(_masm != NULL); + __ load_heap_oop(dst, src, x7, x11, decorators); +} + +Address TemplateTable::at_bcp(int offset) { + assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); + return Address(xbcp, offset); +} + +void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, + Register temp_reg, bool load_bc_into_bc_reg/*=true*/, + int byte_no) +{ + if (!RewriteBytecodes) { return; } + Label L_patch_done; + + switch (bc) { + case Bytecodes::_fast_aputfield: // fall through + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_dputfield: // fall through + case Bytecodes::_fast_fputfield: // fall through + case Bytecodes::_fast_iputfield: // fall through + case Bytecodes::_fast_lputfield: // fall through + case Bytecodes::_fast_sputfield: { + // We skip bytecode quickening for putfield instructions when + // the put_code written to the constant pool cache is zero. + // This is required so that every execution of this instruction + // calls out to InterpreterRuntime::resolve_get_put to do + // additional, required work. + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + assert(load_bc_into_bc_reg, "we use bc_reg as temp"); + __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1); + __ mv(bc_reg, bc); + __ beqz(temp_reg, L_patch_done); + break; + } + default: + assert(byte_no == -1, "sanity"); + // the pair bytecodes have already done the load. + if (load_bc_into_bc_reg) { + __ mv(bc_reg, bc); + } + } + + if (JvmtiExport::can_post_breakpoint()) { + Label L_fast_patch; + // if a breakpoint is present we can't rewrite the stream directly + __ load_unsigned_byte(temp_reg, at_bcp(0)); + __ addi(temp_reg, temp_reg, -Bytecodes::_breakpoint); // temp_reg is temporary register. + __ bnez(temp_reg, L_fast_patch); + // Let breakpoint table handling rewrite to quicker bytecode + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), xmethod, xbcp, bc_reg); + __ j(L_patch_done); + __ bind(L_fast_patch); + } + +#ifdef ASSERT + Label L_okay; + __ load_unsigned_byte(temp_reg, at_bcp(0)); + __ beq(temp_reg, bc_reg, L_okay); + __ addi(temp_reg, temp_reg, -(int) Bytecodes::java_code(bc)); + __ beqz(temp_reg, L_okay); + __ stop("patching the wrong bytecode"); + __ bind(L_okay); +#endif + + // patch bytecode + __ sb(bc_reg, at_bcp(0)); + __ bind(L_patch_done); +} + +// Individual instructions + +void TemplateTable::nop() { + transition(vtos, vtos); + // nothing to do +} + +void TemplateTable::shouldnotreachhere() { + transition(vtos, vtos); + __ stop("should not reach here bytecode"); +} + +void TemplateTable::aconst_null() +{ + transition(vtos, atos); + __ mv(x10, zr); +} + +void TemplateTable::iconst(int value) +{ + transition(vtos, itos); + __ li(x10, value); +} + +void TemplateTable::lconst(int value) +{ + transition(vtos, ltos); + __ li(x10, value); +} + +void TemplateTable::fconst(int value) +{ + transition(vtos, ftos); + static float fBuf[2] = {1.0, 2.0}; + __ mv(t0, (intptr_t)fBuf); + switch (value) { + case 0: + __ fmv_w_x(f10, zr); + break; + case 1: + __ flw(f10, t0, 0); + break; + case 2: + __ flw(f10, t0, sizeof(float)); + break; + default: + ShouldNotReachHere(); + } +} + +void TemplateTable::dconst(int value) +{ + transition(vtos, dtos); + static double dBuf[2] = {1.0, 2.0}; + __ mv(t0, (intptr_t)dBuf); + switch (value) { + case 0: + __ fmv_d_x(f10, zr); + break; + case 1: + __ fld(f10, t0, 0); + break; + case 2: + __ fld(f10, t0, sizeof(double)); + break; + default: + ShouldNotReachHere(); + } +} + +void TemplateTable::bipush() +{ + transition(vtos, itos); + __ load_signed_byte(x10, at_bcp(1)); +} + +void TemplateTable::sipush() +{ + transition(vtos, itos); + __ load_unsigned_short(x10, at_bcp(1)); + __ revb_w_w(x10, x10); + __ sraiw(x10, x10, 16); +} + +void TemplateTable::ldc(bool wide) +{ + transition(vtos, vtos); + Label call_ldc, notFloat, notClass, notInt, Done; + + if (wide) { + __ get_unsigned_2_byte_index_at_bcp(x11, 1); + } else { + __ load_unsigned_byte(x11, at_bcp(1)); + } + __ get_cpool_and_tags(x12, x10); + + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type + __ addi(x13, x11, tags_offset); + __ add(x13, x10, x13); + __ membar(MacroAssembler::AnyAny); + __ lbu(x13, Address(x13, 0)); + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + + // unresolved class - get the resolved class + __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClass); + __ beq(x13, t1, call_ldc); + + // unresolved class in error state - call into runtime to throw the error + // from the first resolution attempt + __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClassInError); + __ beq(x13, t1, call_ldc); + + // resolved class - need to call vm to get java mirror of the class + __ mv(t1, (u1)JVM_CONSTANT_Class); + __ bne(x13, t1, notClass); + + __ bind(call_ldc); + __ mv(c_rarg1, wide); + call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1); + __ push_ptr(x10); + __ verify_oop(x10); + __ j(Done); + + __ bind(notClass); + __ mv(t1, (u1)JVM_CONSTANT_Float); + __ bne(x13, t1, notFloat); + + // ftos + __ shadd(x11, x11, x12, x11, 3); + __ flw(f10, Address(x11, base_offset)); + __ push_f(f10); + __ j(Done); + + __ bind(notFloat); + + __ mv(t1, (u1)JVM_CONSTANT_Integer); + __ bne(x13, t1, notInt); + + // itos + __ shadd(x11, x11, x12, x11, 3); + __ lw(x10, Address(x11, base_offset)); + __ push_i(x10); + __ j(Done); + + __ bind(notInt); + condy_helper(Done); + + __ bind(Done); +} + +// Fast path for caching oop constants. +void TemplateTable::fast_aldc(bool wide) +{ + transition(vtos, atos); + + const Register result = x10; + const Register tmp = x11; + const Register rarg = x12; + + const int index_size = wide ? sizeof(u2) : sizeof(u1); + + Label resolved; + + // We are resolved if the resolved reference cache entry contains a + // non-null object (String, MethodType, etc.) + assert_different_registers(result, tmp); + __ get_cache_index_at_bcp(tmp, 1, index_size); + __ load_resolved_reference_at_index(result, tmp); + __ bnez(result, resolved); + + const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); + + // first time invocation - must resolve first + __ mv(rarg, (int)bytecode()); + __ call_VM(result, entry, rarg); + + __ bind(resolved); + + { // Check for the null sentinel. + // If we just called the VM, it already did the mapping for us, + // but it's harmless to retry. + Label notNull; + + // Stash null_sentinel address to get its value later + int32_t offset = 0; + __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset); + __ ld(tmp, Address(rarg, offset)); + __ resolve_oop_handle(tmp); + __ bne(result, tmp, notNull); + __ mv(result, zr); // NULL object reference + __ bind(notNull); + } + + if (VerifyOops) { + // Safe to call with 0 result + __ verify_oop(result); + } +} + +void TemplateTable::ldc2_w() +{ + transition(vtos, vtos); + Label notDouble, notLong, Done; + __ get_unsigned_2_byte_index_at_bcp(x10, 1); + + __ get_cpool_and_tags(x11, x12); + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type + __ add(x12, x12, x10); + __ load_unsigned_byte(x12, Address(x12, tags_offset)); + __ mv(t1, JVM_CONSTANT_Double); + __ bne(x12, t1, notDouble); + + // dtos + __ shadd(x12, x10, x11, x12, 3); + __ fld(f10, Address(x12, base_offset)); + __ push_d(f10); + __ j(Done); + + __ bind(notDouble); + __ mv(t1, (int)JVM_CONSTANT_Long); + __ bne(x12, t1, notLong); + + // ltos + __ shadd(x10, x10, x11, x10, 3); + __ ld(x10, Address(x10, base_offset)); + __ push_l(x10); + __ j(Done); + + __ bind(notLong); + condy_helper(Done); + __ bind(Done); +} + +void TemplateTable::condy_helper(Label& Done) +{ + const Register obj = x10; + const Register rarg = x11; + const Register flags = x12; + const Register off = x13; + + const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); + + __ mv(rarg, (int) bytecode()); + __ call_VM(obj, entry, rarg); + + __ get_vm_result_2(flags, xthread); + + // VMr = obj = base address to find primitive value to push + // VMr2 = flags = (tos, off) using format of CPCE::_flags + __ mv(off, flags); + __ mv(t0, ConstantPoolCacheEntry::field_index_mask); + __ andrw(off, off, t0); + + __ add(off, obj, off); + const Address field(off, 0); // base + R---->base + offset + + __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits)); + __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> flags:0~3 + + switch (bytecode()) { + case Bytecodes::_ldc: // fall through + case Bytecodes::_ldc_w: { + // tos in (itos, ftos, stos, btos, ctos, ztos) + Label notInt, notFloat, notShort, notByte, notChar, notBool; + __ mv(t1, itos); + __ bne(flags, t1, notInt); + // itos + __ lw(x10, field); + __ push(itos); + __ j(Done); + + __ bind(notInt); + __ mv(t1, ftos); + __ bne(flags, t1, notFloat); + // ftos + __ load_float(field); + __ push(ftos); + __ j(Done); + + __ bind(notFloat); + __ mv(t1, stos); + __ bne(flags, t1, notShort); + // stos + __ load_signed_short(x10, field); + __ push(stos); + __ j(Done); + + __ bind(notShort); + __ mv(t1, btos); + __ bne(flags, t1, notByte); + // btos + __ load_signed_byte(x10, field); + __ push(btos); + __ j(Done); + + __ bind(notByte); + __ mv(t1, ctos); + __ bne(flags, t1, notChar); + // ctos + __ load_unsigned_short(x10, field); + __ push(ctos); + __ j(Done); + + __ bind(notChar); + __ mv(t1, ztos); + __ bne(flags, t1, notBool); + // ztos + __ load_signed_byte(x10, field); + __ push(ztos); + __ j(Done); + + __ bind(notBool); + break; + } + + case Bytecodes::_ldc2_w: { + Label notLong, notDouble; + __ mv(t1, ltos); + __ bne(flags, t1, notLong); + // ltos + __ ld(x10, field); + __ push(ltos); + __ j(Done); + + __ bind(notLong); + __ mv(t1, dtos); + __ bne(flags, t1, notDouble); + // dtos + __ load_double(field); + __ push(dtos); + __ j(Done); + + __ bind(notDouble); + break; + } + + default: + ShouldNotReachHere(); + } + + __ stop("bad ldc/condy"); +} + +void TemplateTable::locals_index(Register reg, int offset) +{ + __ lbu(reg, at_bcp(offset)); + __ neg(reg, reg); +} + +void TemplateTable::iload() { + iload_internal(); +} + +void TemplateTable::nofast_iload() { + iload_internal(may_not_rewrite); +} + +void TemplateTable::iload_internal(RewriteControl rc) { + transition(vtos, itos); + if (RewriteFrequentPairs && rc == may_rewrite) { + Label rewrite, done; + const Register bc = x14; + + // get next bytecode + __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); + + // if _iload, wait to rewrite to iload2. We only want to rewrite the + // last two iloads in a pair. Comparing against fast_iload means that + // the next bytecode is neither an iload or a caload, and therefore + // an iload pair. + __ mv(t1, Bytecodes::_iload); + __ beq(x11, t1, done); + + // if _fast_iload rewrite to _fast_iload2 + __ mv(t1, Bytecodes::_fast_iload); + __ mv(bc, Bytecodes::_fast_iload2); + __ beq(x11, t1, rewrite); + + // if _caload rewrite to _fast_icaload + __ mv(t1, Bytecodes::_caload); + __ mv(bc, Bytecodes::_fast_icaload); + __ beq(x11, t1, rewrite); + + // else rewrite to _fast_iload + __ mv(bc, Bytecodes::_fast_iload); + + // rewrite + // bc: new bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_iload, bc, x11, false); + __ bind(done); + + } + + // do iload, get the local value into tos + locals_index(x11); + __ lw(x10, iaddress(x11, x10, _masm)); +} + +void TemplateTable::fast_iload2() +{ + transition(vtos, itos); + locals_index(x11); + __ lw(x10, iaddress(x11, x10, _masm)); + __ push(itos); + locals_index(x11, 3); + __ lw(x10, iaddress(x11, x10, _masm)); +} + +void TemplateTable::fast_iload() +{ + transition(vtos, itos); + locals_index(x11); + __ lw(x10, iaddress(x11, x10, _masm)); +} + +void TemplateTable::lload() +{ + transition(vtos, ltos); + __ lbu(x11, at_bcp(1)); + __ slli(x11, x11, LogBytesPerWord); + __ sub(x11, xlocals, x11); + __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1))); +} + +void TemplateTable::fload() +{ + transition(vtos, ftos); + locals_index(x11); + __ flw(f10, faddress(x11, t0, _masm)); +} + +void TemplateTable::dload() +{ + transition(vtos, dtos); + __ lbu(x11, at_bcp(1)); + __ slli(x11, x11, LogBytesPerWord); + __ sub(x11, xlocals, x11); + __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1))); +} + +void TemplateTable::aload() +{ + transition(vtos, atos); + locals_index(x11); + __ ld(x10, iaddress(x11, x10, _masm)); + +} + +void TemplateTable::locals_index_wide(Register reg) { + __ lhu(reg, at_bcp(2)); + __ revb_h_h_u(reg, reg); // reverse bytes in half-word and zero-extend + __ neg(reg, reg); +} + +void TemplateTable::wide_iload() { + transition(vtos, itos); + locals_index_wide(x11); + __ lw(x10, iaddress(x11, t0, _masm)); +} + +void TemplateTable::wide_lload() +{ + transition(vtos, ltos); + __ lhu(x11, at_bcp(2)); + __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend + __ slli(x11, x11, LogBytesPerWord); + __ sub(x11, xlocals, x11); + __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1))); +} + +void TemplateTable::wide_fload() +{ + transition(vtos, ftos); + locals_index_wide(x11); + __ flw(f10, faddress(x11, t0, _masm)); +} + +void TemplateTable::wide_dload() +{ + transition(vtos, dtos); + __ lhu(x11, at_bcp(2)); + __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend + __ slli(x11, x11, LogBytesPerWord); + __ sub(x11, xlocals, x11); + __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1))); +} + +void TemplateTable::wide_aload() +{ + transition(vtos, atos); + locals_index_wide(x11); + __ ld(x10, aaddress(x11, t0, _masm)); +} + +void TemplateTable::index_check(Register array, Register index) +{ + // destroys x11, t0 + // check array + __ null_check(array, arrayOopDesc::length_offset_in_bytes()); + // sign extend index for use by indexed load + // check index + const Register length = t0; + __ lwu(length, Address(array, arrayOopDesc::length_offset_in_bytes())); + if (index != x11) { + assert(x11 != array, "different registers"); + __ mv(x11, index); + } + Label ok; + __ addw(index, index, zr); + __ bltu(index, length, ok); + __ mv(x13, array); + __ mv(t0, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); + __ jr(t0); + __ bind(ok); +} + +void TemplateTable::iaload() +{ + transition(itos, itos); + __ mv(x11, x10); + __ pop_ptr(x10); + // x10: array + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); + __ shadd(x10, x11, x10, t0, 2); + __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); + __ addw(x10, x10, zr); // signed extended +} + +void TemplateTable::laload() +{ + transition(itos, ltos); + __ mv(x11, x10); + __ pop_ptr(x10); + // x10: array + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); + __ shadd(x10, x11, x10, t0, 3); + __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::faload() +{ + transition(itos, ftos); + __ mv(x11, x10); + __ pop_ptr(x10); + // x10: array + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); + __ shadd(x10, x11, x10, t0, 2); + __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::daload() +{ + transition(itos, dtos); + __ mv(x11, x10); + __ pop_ptr(x10); + // x10: array + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); + __ shadd(x10, x11, x10, t0, 3); + __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::aaload() +{ + transition(itos, atos); + __ mv(x11, x10); + __ pop_ptr(x10); + // x10: array + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop); + do_oop_load(_masm, + Address(x10), + x10, + IS_ARRAY); +} + +void TemplateTable::baload() +{ + transition(itos, itos); + __ mv(x11, x10); + __ pop_ptr(x10); + // x10: array + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); + __ shadd(x10, x11, x10, t0, 0); + __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::caload() +{ + transition(itos, itos); + __ mv(x11, x10); + __ pop_ptr(x10); + // x10: array + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); + __ shadd(x10, x11, x10, t0, 1); + __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +// iload followed by caload frequent pair +void TemplateTable::fast_icaload() +{ + transition(vtos, itos); + // load index out of locals + locals_index(x12); + __ lw(x11, iaddress(x12, x11, _masm)); + __ pop_ptr(x10); + + // x10: array + // x11: index + index_check(x10, x11); // leaves index in x11, kills t0 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11 + __ shadd(x10, x11, x10, t0, 1); + __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::saload() +{ + transition(itos, itos); + __ mv(x11, x10); + __ pop_ptr(x10); + // x10: array + // x11: index + index_check(x10, x11); // leaves index in x11, kills t0 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1); + __ shadd(x10, x11, x10, t0, 1); + __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::iload(int n) +{ + transition(vtos, itos); + __ lw(x10, iaddress(n)); +} + +void TemplateTable::lload(int n) +{ + transition(vtos, ltos); + __ ld(x10, laddress(n)); +} + +void TemplateTable::fload(int n) +{ + transition(vtos, ftos); + __ flw(f10, faddress(n)); +} + +void TemplateTable::dload(int n) +{ + transition(vtos, dtos); + __ fld(f10, daddress(n)); +} + +void TemplateTable::aload(int n) +{ + transition(vtos, atos); + __ ld(x10, iaddress(n)); +} + +void TemplateTable::aload_0() { + aload_0_internal(); +} + +void TemplateTable::nofast_aload_0() { + aload_0_internal(may_not_rewrite); +} + +void TemplateTable::aload_0_internal(RewriteControl rc) { + // According to bytecode histograms, the pairs: + // + // _aload_0, _fast_igetfield + // _aload_0, _fast_agetfield + // _aload_0, _fast_fgetfield + // + // occur frequently. If RewriteFrequentPairs is set, the (slow) + // _aload_0 bytecode checks if the next bytecode is either + // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then + // rewrites the current bytecode into a pair bytecode; otherwise it + // rewrites the current bytecode into _fast_aload_0 that doesn't do + // the pair check anymore. + // + // Note: If the next bytecode is _getfield, the rewrite must be + // delayed, otherwise we may miss an opportunity for a pair. + // + // Also rewrite frequent pairs + // aload_0, aload_1 + // aload_0, iload_1 + // These bytecodes with a small amount of code are most profitable + // to rewrite + if (RewriteFrequentPairs && rc == may_rewrite) { + Label rewrite, done; + const Register bc = x14; + + // get next bytecode + __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); + + // if _getfield then wait with rewrite + __ mv(t1, Bytecodes::Bytecodes::_getfield); + __ beq(x11, t1, done); + + // if _igetfield then rewrite to _fast_iaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ mv(t1, Bytecodes::_fast_igetfield); + __ mv(bc, Bytecodes::_fast_iaccess_0); + __ beq(x11, t1, rewrite); + + // if _agetfield then rewrite to _fast_aaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ mv(t1, Bytecodes::_fast_agetfield); + __ mv(bc, Bytecodes::_fast_aaccess_0); + __ beq(x11, t1, rewrite); + + // if _fgetfield then rewrite to _fast_faccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ mv(t1, Bytecodes::_fast_fgetfield); + __ mv(bc, Bytecodes::_fast_faccess_0); + __ beq(x11, t1, rewrite); + + // else rewrite to _fast_aload0 + assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ mv(bc, Bytecodes::Bytecodes::_fast_aload_0); + + // rewrite + // bc: new bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_aload_0, bc, x11, false); + + __ bind(done); + } + + // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop). + aload(0); +} + +void TemplateTable::istore() +{ + transition(itos, vtos); + locals_index(x11); + __ sw(x10, iaddress(x11, t0, _masm)); +} + +void TemplateTable::lstore() +{ + transition(ltos, vtos); + locals_index(x11); + __ sd(x10, laddress(x11, t0, _masm)); +} + +void TemplateTable::fstore() { + transition(ftos, vtos); + locals_index(x11); + __ fsw(f10, iaddress(x11, t0, _masm)); +} + +void TemplateTable::dstore() { + transition(dtos, vtos); + locals_index(x11); + __ fsd(f10, daddress(x11, t0, _masm)); +} + +void TemplateTable::astore() +{ + transition(vtos, vtos); + __ pop_ptr(x10); + locals_index(x11); + __ sd(x10, aaddress(x11, t0, _masm)); +} + +void TemplateTable::wide_istore() { + transition(vtos, vtos); + __ pop_i(); + locals_index_wide(x11); + __ sw(x10, iaddress(x11, t0, _masm)); +} + +void TemplateTable::wide_lstore() { + transition(vtos, vtos); + __ pop_l(); + locals_index_wide(x11); + __ sd(x10, laddress(x11, t0, _masm)); +} + +void TemplateTable::wide_fstore() { + transition(vtos, vtos); + __ pop_f(); + locals_index_wide(x11); + __ fsw(f10, faddress(x11, t0, _masm)); +} + +void TemplateTable::wide_dstore() { + transition(vtos, vtos); + __ pop_d(); + locals_index_wide(x11); + __ fsd(f10, daddress(x11, t0, _masm)); +} + +void TemplateTable::wide_astore() { + transition(vtos, vtos); + __ pop_ptr(x10); + locals_index_wide(x11); + __ sd(x10, aaddress(x11, t0, _masm)); +} + +void TemplateTable::iastore() { + transition(itos, vtos); + __ pop_i(x11); + __ pop_ptr(x13); + // x10: value + // x11: index + // x13: array + index_check(x13, x11); // prefer index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); + __ shadd(t0, x11, x13, t0, 2); + __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg); +} + +void TemplateTable::lastore() { + transition(ltos, vtos); + __ pop_i(x11); + __ pop_ptr(x13); + // x10: value + // x11: index + // x13: array + index_check(x13, x11); // prefer index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); + __ shadd(t0, x11, x13, t0, 3); + __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg); +} + +void TemplateTable::fastore() { + transition(ftos, vtos); + __ pop_i(x11); + __ pop_ptr(x13); + // f10: value + // x11: index + // x13: array + index_check(x13, x11); // prefer index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); + __ shadd(t0, x11, x13, t0, 2); + __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg); +} + +void TemplateTable::dastore() { + transition(dtos, vtos); + __ pop_i(x11); + __ pop_ptr(x13); + // f10: value + // x11: index + // x13: array + index_check(x13, x11); // prefer index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); + __ shadd(t0, x11, x13, t0, 3); + __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg); +} + +void TemplateTable::aastore() { + Label is_null, ok_is_subtype, done; + transition(vtos, vtos); + // stack: ..., array, index, value + __ ld(x10, at_tos()); // value + __ ld(x12, at_tos_p1()); // index + __ ld(x13, at_tos_p2()); // array + + index_check(x13, x12); // kills x11 + __ add(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + __ shadd(x14, x14, x13, x14, LogBytesPerHeapOop); + + Address element_address(x14, 0); + + // do array store check - check for NULL value first + __ beqz(x10, is_null); + + // Move subklass into x11 + __ load_klass(x11, x10); + // Move superklass into x10 + __ load_klass(x10, x13); + __ ld(x10, Address(x10, + ObjArrayKlass::element_klass_offset())); + // Compress array + index * oopSize + 12 into a single register. Frees x12. + + // Generate subtype check. Blows x12, x15 + // Superklass in x10. Subklass in x11. + __ gen_subtype_check(x11, ok_is_subtype); //todo + + // Come here on failure + // object is at TOS + __ j(Interpreter::_throw_ArrayStoreException_entry); + + // Come here on success + __ bind(ok_is_subtype); + + // Get the value we will store + __ ld(x10, at_tos()); + // Now store using the appropriate barrier + do_oop_store(_masm, element_address, x10, IS_ARRAY); + __ j(done); + + // Have a NULL in x10, x13=array, x12=index. Store NULL at ary[idx] + __ bind(is_null); + __ profile_null_seen(x12); + + // Store a NULL + do_oop_store(_masm, element_address, noreg, IS_ARRAY); + + // Pop stack arguments + __ bind(done); + __ add(esp, esp, 3 * Interpreter::stackElementSize); + +} + +void TemplateTable::bastore() +{ + transition(itos, vtos); + __ pop_i(x11); + __ pop_ptr(x13); + // x10: value + // x11: index + // x13: array + index_check(x13, x11); // prefer index in x11 + + // Need to check whether array is boolean or byte + // since both types share the bastore bytecode. + __ load_klass(x12, x13); + __ lwu(x12, Address(x12, Klass::layout_helper_offset())); + Label L_skip; + __ andi(t0, x12, Klass::layout_helper_boolean_diffbit()); + __ beqz(t0, L_skip); + __ andi(x10, x10, 1); // if it is a T_BOOLEAN array, mask the stored value to 0/1 + __ bind(L_skip); + + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); + + __ add(x11, x13, x11); + __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg); +} + +void TemplateTable::castore() +{ + transition(itos, vtos); + __ pop_i(x11); + __ pop_ptr(x13); + // x10: value + // x11: index + // x13: array + index_check(x13, x11); // prefer index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); + __ shadd(t0, x11, x13, t0, 1); + __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg); +} + +void TemplateTable::sastore() +{ + castore(); +} + +void TemplateTable::istore(int n) +{ + transition(itos, vtos); + __ sd(x10, iaddress(n)); +} + +void TemplateTable::lstore(int n) +{ + transition(ltos, vtos); + __ sd(x10, laddress(n)); +} + +void TemplateTable::fstore(int n) +{ + transition(ftos, vtos); + __ fsw(f10, faddress(n)); +} + +void TemplateTable::dstore(int n) +{ + transition(dtos, vtos); + __ fsd(f10, daddress(n)); +} + +void TemplateTable::astore(int n) +{ + transition(vtos, vtos); + __ pop_ptr(x10); + __ sd(x10, iaddress(n)); +} + +void TemplateTable::pop() +{ + transition(vtos, vtos); + __ addi(esp, esp, Interpreter::stackElementSize); +} + +void TemplateTable::pop2() +{ + transition(vtos, vtos); + __ addi(esp, esp, 2 * Interpreter::stackElementSize); +} + +void TemplateTable::dup() +{ + transition(vtos, vtos); + __ ld(x10, Address(esp, 0)); + __ push_reg(x10); + // stack: ..., a, a +} + +void TemplateTable::dup_x1() +{ + transition(vtos, vtos); + // stack: ..., a, b + __ ld(x10, at_tos()); // load b + __ ld(x12, at_tos_p1()); // load a + __ sd(x10, at_tos_p1()); // store b + __ sd(x12, at_tos()); // store a + __ push_reg(x10); // push b + // stack: ..., b, a, b +} + +void TemplateTable::dup_x2() +{ + transition(vtos, vtos); + // stack: ..., a, b, c + __ ld(x10, at_tos()); // load c + __ ld(x12, at_tos_p2()); // load a + __ sd(x10, at_tos_p2()); // store c in a + __ push_reg(x10); // push c + // stack: ..., c, b, c, c + __ ld(x10, at_tos_p2()); // load b + __ sd(x12, at_tos_p2()); // store a in b + // stack: ..., c, a, c, c + __ sd(x10, at_tos_p1()); // store b in c + // stack: ..., c, a, b, c +} + +void TemplateTable::dup2() +{ + transition(vtos, vtos); + // stack: ..., a, b + __ ld(x10, at_tos_p1()); // load a + __ push_reg(x10); // push a + __ ld(x10, at_tos_p1()); // load b + __ push_reg(x10); // push b + // stack: ..., a, b, a, b +} + +void TemplateTable::dup2_x1() +{ + transition(vtos, vtos); + // stack: ..., a, b, c + __ ld(x12, at_tos()); // load c + __ ld(x10, at_tos_p1()); // load b + __ push_reg(x10); // push b + __ push_reg(x12); // push c + // stack: ..., a, b, c, b, c + __ sd(x12, at_tos_p3()); // store c in b + // stack: ..., a, c, c, b, c + __ ld(x12, at_tos_p4()); // load a + __ sd(x12, at_tos_p2()); // store a in 2nd c + // stack: ..., a, c, a, b, c + __ sd(x10, at_tos_p4()); // store b in a + // stack: ..., b, c, a, b, c +} + +void TemplateTable::dup2_x2() +{ + transition(vtos, vtos); + // stack: ..., a, b, c, d + __ ld(x12, at_tos()); // load d + __ ld(x10, at_tos_p1()); // load c + __ push_reg(x10); // push c + __ push_reg(x12); // push d + // stack: ..., a, b, c, d, c, d + __ ld(x10, at_tos_p4()); // load b + __ sd(x10, at_tos_p2()); // store b in d + __ sd(x12, at_tos_p4()); // store d in b + // stack: ..., a, d, c, b, c, d + __ ld(x12, at_tos_p5()); // load a + __ ld(x10, at_tos_p3()); // load c + __ sd(x12, at_tos_p3()); // store a in c + __ sd(x10, at_tos_p5()); // store c in a + // stack: ..., c, d, a, b, c, d +} + +void TemplateTable::swap() +{ + transition(vtos, vtos); + // stack: ..., a, b + __ ld(x12, at_tos_p1()); // load a + __ ld(x10, at_tos()); // load b + __ sd(x12, at_tos()); // store a in b + __ sd(x10, at_tos_p1()); // store b in a + // stack: ..., b, a +} + +void TemplateTable::iop2(Operation op) +{ + transition(itos, itos); + // x10 <== x11 op x10 + __ pop_i(x11); + switch (op) { + case add : __ addw(x10, x11, x10); break; + case sub : __ subw(x10, x11, x10); break; + case mul : __ mulw(x10, x11, x10); break; + case _and : __ andrw(x10, x11, x10); break; + case _or : __ orrw(x10, x11, x10); break; + case _xor : __ xorrw(x10, x11, x10); break; + case shl : __ sllw(x10, x11, x10); break; + case shr : __ sraw(x10, x11, x10); break; + case ushr : __ srlw(x10, x11, x10); break; + default : ShouldNotReachHere(); + } +} + +void TemplateTable::lop2(Operation op) +{ + transition(ltos, ltos); + // x10 <== x11 op x10 + __ pop_l(x11); + switch (op) { + case add : __ add(x10, x11, x10); break; + case sub : __ sub(x10, x11, x10); break; + case mul : __ mul(x10, x11, x10); break; + case _and : __ andr(x10, x11, x10); break; + case _or : __ orr(x10, x11, x10); break; + case _xor : __ xorr(x10, x11, x10); break; + default : ShouldNotReachHere(); + } +} + +void TemplateTable::idiv() +{ + transition(itos, itos); + // explicitly check for div0 + Label no_div0; + __ bnez(x10, no_div0); + __ mv(t0, Interpreter::_throw_ArithmeticException_entry); + __ jr(t0); + __ bind(no_div0); + __ pop_i(x11); + // x10 <== x11 idiv x10 + __ corrected_idivl(x10, x11, x10, /* want_remainder */ false); +} + +void TemplateTable::irem() +{ + transition(itos, itos); + // explicitly check for div0 + Label no_div0; + __ bnez(x10, no_div0); + __ mv(t0, Interpreter::_throw_ArithmeticException_entry); + __ jr(t0); + __ bind(no_div0); + __ pop_i(x11); + // x10 <== x11 irem x10 + __ corrected_idivl(x10, x11, x10, /* want_remainder */ true); +} + +void TemplateTable::lmul() +{ + transition(ltos, ltos); + __ pop_l(x11); + __ mul(x10, x10, x11); +} + +void TemplateTable::ldiv() +{ + transition(ltos, ltos); + // explicitly check for div0 + Label no_div0; + __ bnez(x10, no_div0); + __ mv(t0, Interpreter::_throw_ArithmeticException_entry); + __ jr(t0); + __ bind(no_div0); + __ pop_l(x11); + // x10 <== x11 ldiv x10 + __ corrected_idivq(x10, x11, x10, /* want_remainder */ false); +} + +void TemplateTable::lrem() +{ + transition(ltos, ltos); + // explicitly check for div0 + Label no_div0; + __ bnez(x10, no_div0); + __ mv(t0, Interpreter::_throw_ArithmeticException_entry); + __ jr(t0); + __ bind(no_div0); + __ pop_l(x11); + // x10 <== x11 lrem x10 + __ corrected_idivq(x10, x11, x10, /* want_remainder */ true); +} + +void TemplateTable::lshl() +{ + transition(itos, ltos); + // shift count is in x10 + __ pop_l(x11); + __ sll(x10, x11, x10); +} + +void TemplateTable::lshr() +{ + transition(itos, ltos); + // shift count is in x10 + __ pop_l(x11); + __ sra(x10, x11, x10); +} + +void TemplateTable::lushr() +{ + transition(itos, ltos); + // shift count is in x10 + __ pop_l(x11); + __ srl(x10, x11, x10); +} + +void TemplateTable::fop2(Operation op) +{ + transition(ftos, ftos); + switch (op) { + case add: + __ pop_f(f11); + __ fadd_s(f10, f11, f10); + break; + case sub: + __ pop_f(f11); + __ fsub_s(f10, f11, f10); + break; + case mul: + __ pop_f(f11); + __ fmul_s(f10, f11, f10); + break; + case div: + __ pop_f(f11); + __ fdiv_s(f10, f11, f10); + break; + case rem: + __ fmv_s(f11, f10); + __ pop_f(f10); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem)); + break; + default: + ShouldNotReachHere(); + } +} + +void TemplateTable::dop2(Operation op) +{ + transition(dtos, dtos); + switch (op) { + case add: + __ pop_d(f11); + __ fadd_d(f10, f11, f10); + break; + case sub: + __ pop_d(f11); + __ fsub_d(f10, f11, f10); + break; + case mul: + __ pop_d(f11); + __ fmul_d(f10, f11, f10); + break; + case div: + __ pop_d(f11); + __ fdiv_d(f10, f11, f10); + break; + case rem: + __ fmv_d(f11, f10); + __ pop_d(f10); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem)); + break; + default: + ShouldNotReachHere(); + } +} + +void TemplateTable::ineg() +{ + transition(itos, itos); + __ negw(x10, x10); +} + +void TemplateTable::lneg() +{ + transition(ltos, ltos); + __ neg(x10, x10); +} + +void TemplateTable::fneg() +{ + transition(ftos, ftos); + __ fneg_s(f10, f10); +} + +void TemplateTable::dneg() +{ + transition(dtos, dtos); + __ fneg_d(f10, f10); +} + +void TemplateTable::iinc() +{ + transition(vtos, vtos); + __ load_signed_byte(x11, at_bcp(2)); // get constant + locals_index(x12); + __ ld(x10, iaddress(x12, x10, _masm)); + __ addw(x10, x10, x11); + __ sd(x10, iaddress(x12, t0, _masm)); +} + +void TemplateTable::wide_iinc() +{ + transition(vtos, vtos); + __ lwu(x11, at_bcp(2)); // get constant and index + __ revb_h_w_u(x11, x11); // reverse bytes in half-word (32bit) and zero-extend + __ zero_extend(x12, x11, 16); + __ neg(x12, x12); + __ slli(x11, x11, 32); + __ srai(x11, x11, 48); + __ ld(x10, iaddress(x12, t0, _masm)); + __ addw(x10, x10, x11); + __ sd(x10, iaddress(x12, t0, _masm)); +} + +void TemplateTable::convert() +{ + // Checking +#ifdef ASSERT + { + TosState tos_in = ilgl; + TosState tos_out = ilgl; + switch (bytecode()) { + case Bytecodes::_i2l: // fall through + case Bytecodes::_i2f: // fall through + case Bytecodes::_i2d: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_in = itos; break; + case Bytecodes::_l2i: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_l2d: tos_in = ltos; break; + case Bytecodes::_f2i: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_f2d: tos_in = ftos; break; + case Bytecodes::_d2i: // fall through + case Bytecodes::_d2l: // fall through + case Bytecodes::_d2f: tos_in = dtos; break; + default : ShouldNotReachHere(); + } + switch (bytecode()) { + case Bytecodes::_l2i: // fall through + case Bytecodes::_f2i: // fall through + case Bytecodes::_d2i: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_out = itos; break; + case Bytecodes::_i2l: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_d2l: tos_out = ltos; break; + case Bytecodes::_i2f: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_d2f: tos_out = ftos; break; + case Bytecodes::_i2d: // fall through + case Bytecodes::_l2d: // fall through + case Bytecodes::_f2d: tos_out = dtos; break; + default : ShouldNotReachHere(); + } + transition(tos_in, tos_out); + } +#endif // ASSERT + + // Conversion + switch (bytecode()) { + case Bytecodes::_i2l: + __ sign_extend(x10, x10, 32); + break; + case Bytecodes::_i2f: + __ fcvt_s_w(f10, x10); + break; + case Bytecodes::_i2d: + __ fcvt_d_w(f10, x10); + break; + case Bytecodes::_i2b: + __ sign_extend(x10, x10, 8); + break; + case Bytecodes::_i2c: + __ zero_extend(x10, x10, 16); + break; + case Bytecodes::_i2s: + __ sign_extend(x10, x10, 16); + break; + case Bytecodes::_l2i: + __ addw(x10, x10, zr); + break; + case Bytecodes::_l2f: + __ fcvt_s_l(f10, x10); + break; + case Bytecodes::_l2d: + __ fcvt_d_l(f10, x10); + break; + case Bytecodes::_f2i: + __ fcvt_w_s_safe(x10, f10); + break; + case Bytecodes::_f2l: + __ fcvt_l_s_safe(x10, f10); + break; + case Bytecodes::_f2d: + __ fcvt_d_s(f10, f10); + break; + case Bytecodes::_d2i: + __ fcvt_w_d_safe(x10, f10); + break; + case Bytecodes::_d2l: + __ fcvt_l_d_safe(x10, f10); + break; + case Bytecodes::_d2f: + __ fcvt_s_d(f10, f10); + break; + default: + ShouldNotReachHere(); + } +} + +void TemplateTable::lcmp() +{ + transition(ltos, itos); + __ pop_l(x11); + __ cmp_l2i(t0, x11, x10); + __ mv(x10, t0); +} + +void TemplateTable::float_cmp(bool is_float, int unordered_result) +{ + // For instruction feq, flt and fle, the result is 0 if either operand is NaN + if (is_float) { + __ pop_f(f11); + // if unordered_result < 0: + // we want -1 for unordered or less than, 0 for equal and 1 for + // greater than. + // else: + // we want -1 for less than, 0 for equal and 1 for unordered or + // greater than. + // f11 primary, f10 secondary + __ float_compare(x10, f11, f10, unordered_result); + } else { + __ pop_d(f11); + // if unordered_result < 0: + // we want -1 for unordered or less than, 0 for equal and 1 for + // greater than. + // else: + // we want -1 for less than, 0 for equal and 1 for unordered or + // greater than. + // f11 primary, f10 secondary + __ double_compare(x10, f11, f10, unordered_result); + } +} + +void TemplateTable::branch(bool is_jsr, bool is_wide) +{ + // We might be moving to a safepoint. The thread which calls + // Interpreter::notice_safepoints() will effectively flush its cache + // when it makes a system call, but we need to do something to + // ensure that we see the changed dispatch table. + __ membar(MacroAssembler::LoadLoad); + + __ profile_taken_branch(x10, x11); + const ByteSize be_offset = MethodCounters::backedge_counter_offset() + + InvocationCounter::counter_offset(); + const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset(); + + // load branch displacement + if (!is_wide) { + __ lhu(x12, at_bcp(1)); + __ revb_h_h(x12, x12); // reverse bytes in half-word and sign-extend + } else { + __ lwu(x12, at_bcp(1)); + __ revb_w_w(x12, x12); // reverse bytes in word and sign-extend + } + + // Handle all the JSR stuff here, then exit. + // It's much shorter and cleaner than intermingling with the non-JSR + // normal-branch stuff occurring below. + + if (is_jsr) { + // compute return address as bci + __ ld(t1, Address(xmethod, Method::const_offset())); + __ add(t1, t1, + in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3)); + __ sub(x11, xbcp, t1); + __ push_i(x11); + // Adjust the bcp by the 16-bit displacement in x12 + __ add(xbcp, xbcp, x12); + __ load_unsigned_byte(t0, Address(xbcp, 0)); + // load the next target bytecode into t0, it is the argument of dispatch_only + __ dispatch_only(vtos, /*generate_poll*/true); + return; + } + + // Normal (non-jsr) branch handling + + // Adjust the bcp by the displacement in x12 + __ add(xbcp, xbcp, x12); + + assert(UseLoopCounter || !UseOnStackReplacement, + "on-stack-replacement requires loop counters"); + Label backedge_counter_overflow; + Label dispatch; + if (UseLoopCounter) { + // increment backedge counter for backward branches + // x10: MDO + // x11: MDO bumped taken-count + // x12: target offset + __ bgtz(x12, dispatch); // count only if backward branch + + // check if MethodCounters exists + Label has_counters; + __ ld(t0, Address(xmethod, Method::method_counters_offset())); + __ bnez(t0, has_counters); + __ push_reg(x10); + __ push_reg(x11); + __ push_reg(x12); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), xmethod); + __ pop_reg(x12); + __ pop_reg(x11); + __ pop_reg(x10); + __ ld(t0, Address(xmethod, Method::method_counters_offset())); + __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory + __ bind(has_counters); + + Label no_mdo; + int increment = InvocationCounter::count_increment; + if (ProfileInterpreter) { + // Are we profiling? + __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); + __ beqz(x11, no_mdo); + // Increment the MDO backedge counter + const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + const Address mask(x11, in_bytes(MethodData::backedge_mask_offset())); + __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, + x10, t0, false, + UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); + __ j(dispatch); + } + __ bind(no_mdo); + // Increment backedge counter in MethodCounters* + __ ld(t0, Address(xmethod, Method::method_counters_offset())); + const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset())); + __ increment_mask_and_jump(Address(t0, be_offset), increment, mask, + x10, t1, false, + UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); + __ bind(dispatch); + } + + // Pre-load the next target bytecode into t0 + __ load_unsigned_byte(t0, Address(xbcp, 0)); + + // continue with the bytecode @ target + // t0: target bytecode + // xbcp: target bcp + __ dispatch_only(vtos, /*generate_poll*/true); + + if (UseLoopCounter && UseOnStackReplacement) { + // invocation counter overflow + __ bind(backedge_counter_overflow); + __ neg(x12, x12); + __ add(x12, x12, xbcp); // branch xbcp + // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), + x12); + __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode + + // x10: osr nmethod (osr ok) or NULL (osr not possible) + // w11: target bytecode + // x12: temporary + __ beqz(x10, dispatch); // test result -- no osr if null + // nmethod may have been invalidated (VM may block upon call_VM return) + __ lbu(x12, Address(x10, nmethod::state_offset())); + if (nmethod::in_use != 0) { + __ sub(x12, x12, nmethod::in_use); + } + __ bnez(x12, dispatch); + + // We have the address of an on stack replacement routine in x10 + // We need to prepare to execute the OSR method. First we must + // migrate the locals and monitors off of the stack. + + __ mv(x9, x10); // save the nmethod + + call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); + + // x10 is OSR buffer, move it to expected parameter location + __ mv(j_rarg0, x10); + + // remove activation + // get sender esp + __ ld(esp, + Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); + // remove frame anchor + __ leave(); + // Ensure compiled code always sees stack at proper alignment + __ andi(sp, esp, -16); + + // and begin the OSR nmethod + __ ld(t0, Address(x9, nmethod::osr_entry_point_offset())); + __ jr(t0); + } +} + +void TemplateTable::if_0cmp(Condition cc) +{ + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + + __ addw(x10, x10, zr); + switch (cc) { + case equal: + __ bnez(x10, not_taken); + break; + case not_equal: + __ beqz(x10, not_taken); + break; + case less: + __ bgez(x10, not_taken); + break; + case less_equal: + __ bgtz(x10, not_taken); + break; + case greater: + __ blez(x10, not_taken); + break; + case greater_equal: + __ bltz(x10, not_taken); + break; + default: + break; + } + + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(x10); +} + +void TemplateTable::if_icmp(Condition cc) +{ + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + __ pop_i(x11); + __ addw(x10, x10, zr); + switch (cc) { + case equal: + __ bne(x11, x10, not_taken); + break; + case not_equal: + __ beq(x11, x10, not_taken); + break; + case less: + __ bge(x11, x10, not_taken); + break; + case less_equal: + __ bgt(x11, x10, not_taken); + break; + case greater: + __ ble(x11, x10, not_taken); + break; + case greater_equal: + __ blt(x11, x10, not_taken); + break; + default: + break; + } + + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(x10); +} + +void TemplateTable::if_nullcmp(Condition cc) +{ + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + if (cc == equal) { + __ bnez(x10, not_taken); + } else { + __ beqz(x10, not_taken); + } + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(x10); +} + +void TemplateTable::if_acmp(Condition cc) +{ + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + __ pop_ptr(x11); + + if (cc == equal) { + __ bne(x11, x10, not_taken); + } else if (cc == not_equal) { + __ beq(x11, x10, not_taken); + } + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(x10); +} + +void TemplateTable::ret() { + transition(vtos, vtos); + // We might be moving to a safepoint. The thread which calls + // Interpreter::notice_safepoints() will effectively flush its cache + // when it makes a system call, but we need to do something to + // ensure that we see the changed dispatch table. + __ membar(MacroAssembler::LoadLoad); + + locals_index(x11); + __ ld(x11, aaddress(x11, t1, _masm)); // get return bci, compute return bcp + __ profile_ret(x11, x12); + __ ld(xbcp, Address(xmethod, Method::const_offset())); + __ add(xbcp, xbcp, x11); + __ addi(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); + __ dispatch_next(vtos, 0, /*generate_poll*/true); +} + +void TemplateTable::wide_ret() { + transition(vtos, vtos); + locals_index_wide(x11); + __ ld(x11, aaddress(x11, t0, _masm)); // get return bci, compute return bcp + __ profile_ret(x11, x12); + __ ld(xbcp, Address(xmethod, Method::const_offset())); + __ add(xbcp, xbcp, x11); + __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); + __ dispatch_next(vtos, 0, /*generate_poll*/true); +} + +void TemplateTable::tableswitch() { + Label default_case, continue_execution; + transition(itos, vtos); + // align xbcp + __ la(x11, at_bcp(BytesPerInt)); + __ andi(x11, x11, -BytesPerInt); + // load lo & hi + __ lwu(x12, Address(x11, BytesPerInt)); + __ lwu(x13, Address(x11, 2 * BytesPerInt)); + __ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend + __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend + // check against lo & hi + __ blt(x10, x12, default_case); + __ bgt(x10, x13, default_case); + // lookup dispatch offset + __ subw(x10, x10, x12); + __ shadd(x13, x10, x11, t0, 2); + __ lwu(x13, Address(x13, 3 * BytesPerInt)); + __ profile_switch_case(x10, x11, x12); + // continue execution + __ bind(continue_execution); + __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend + __ add(xbcp, xbcp, x13); + __ load_unsigned_byte(t0, Address(xbcp)); + __ dispatch_only(vtos, /*generate_poll*/true); + // handle default + __ bind(default_case); + __ profile_switch_default(x10); + __ lwu(x13, Address(x11, 0)); + __ j(continue_execution); +} + +void TemplateTable::lookupswitch() { + transition(itos, itos); + __ stop("lookupswitch bytecode should have been rewritten"); +} + +void TemplateTable::fast_linearswitch() { + transition(itos, vtos); + Label loop_entry, loop, found, continue_execution; + // bswap x10 so we can avoid bswapping the table entries + __ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend + // align xbcp + __ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of + // this instruction (change offsets + // below) + __ andi(x9, x9, -BytesPerInt); + // set counter + __ lwu(x11, Address(x9, BytesPerInt)); + __ revb_w(x11, x11); + __ j(loop_entry); + // table search + __ bind(loop); + __ shadd(t0, x11, x9, t0, 3); + __ lw(t0, Address(t0, 2 * BytesPerInt)); + __ beq(x10, t0, found); + __ bind(loop_entry); + __ addi(x11, x11, -1); + __ bgez(x11, loop); + // default case + __ profile_switch_default(x10); + __ lwu(x13, Address(x9, 0)); + __ j(continue_execution); + // entry found -> get offset + __ bind(found); + __ shadd(t0, x11, x9, t0, 3); + __ lwu(x13, Address(t0, 3 * BytesPerInt)); + __ profile_switch_case(x11, x10, x9); + // continue execution + __ bind(continue_execution); + __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend + __ add(xbcp, xbcp, x13); + __ lbu(t0, Address(xbcp, 0)); + __ dispatch_only(vtos, /*generate_poll*/true); +} + +void TemplateTable::fast_binaryswitch() { + transition(itos, vtos); + // Implementation using the following core algorithm: + // + // int binary_search(int key, LookupswitchPair* array, int n) + // binary_search start: + // #Binary search according to "Methodik des Programmierens" by + // # Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. + // int i = 0; + // int j = n; + // while (i + 1 < j) do + // # invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) + // # with Q: for all i: 0 <= i < n: key < a[i] + // # where a stands for the array and assuming that the (inexisting) + // # element a[n] is infinitely big. + // int h = (i + j) >> 1 + // # i < h < j + // if (key < array[h].fast_match()) + // then [j = h] + // else [i = h] + // end + // # R: a[i] <= key < a[i+1] or Q + // # (i.e., if key is within array, i is the correct index) + // return i + // binary_search end + + + // Register allocation + const Register key = x10; // already set (tosca) + const Register array = x11; + const Register i = x12; + const Register j = x13; + const Register h = x14; + const Register temp = x15; + + // Find array start + __ la(array, at_bcp(3 * BytesPerInt)); // btw: should be able to + // get rid of this + // instruction (change + // offsets below) + __ andi(array, array, -BytesPerInt); + + // Initialize i & j + __ mv(i, zr); // i = 0 + __ lwu(j, Address(array, -BytesPerInt)); // j = length(array) + + // Convert j into native byteordering + __ revb_w(j, j); + + // And start + Label entry; + __ j(entry); + + // binary search loop + { + Label loop; + __ bind(loop); + __ addw(h, i, j); // h = i + j + __ srliw(h, h, 1); // h = (i + j) >> 1 + // if [key < array[h].fast_match()] + // then [j = h] + // else [i = h] + // Convert array[h].match to native byte-ordering before compare + __ shadd(temp, h, array, temp, 3); + __ ld(temp, Address(temp, 0)); + __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend + + Label L_done, L_greater; + __ bge(key, temp, L_greater); + // if [key < array[h].fast_match()] then j = h + __ mv(j, h); + __ j(L_done); + __ bind(L_greater); + // if [key >= array[h].fast_match()] then i = h + __ mv(i, h); + __ bind(L_done); + + // while [i + 1 < j] + __ bind(entry); + __ addiw(h, i, 1); // i + 1 + __ blt(h, j, loop); // i + 1 < j + } + + // end of binary search, result index is i (must check again!) + Label default_case; + // Convert array[i].match to native byte-ordering before compare + __ shadd(temp, i, array, temp, 3); + __ ld(temp, Address(temp, 0)); + __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend + __ bne(key, temp, default_case); + + // entry found -> j = offset + __ shadd(temp, i, array, temp, 3); + __ lwu(j, Address(temp, BytesPerInt)); + __ profile_switch_case(i, key, array); + __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend + + __ add(temp, xbcp, j); + __ load_unsigned_byte(t0, Address(temp, 0)); + + __ add(xbcp, xbcp, j); + __ la(xbcp, Address(xbcp, 0)); + __ dispatch_only(vtos, /*generate_poll*/true); + + // default case -> j = default offset + __ bind(default_case); + __ profile_switch_default(i); + __ lwu(j, Address(array, -2 * BytesPerInt)); + __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend + + __ add(temp, xbcp, j); + __ load_unsigned_byte(t0, Address(temp, 0)); + + __ add(xbcp, xbcp, j); + __ la(xbcp, Address(xbcp, 0)); + __ dispatch_only(vtos, /*generate_poll*/true); +} + +void TemplateTable::_return(TosState state) +{ + transition(state, state); + assert(_desc->calls_vm(), + "inconsistent calls_vm information"); // call in remove_activation + + if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { + assert(state == vtos, "only valid state"); + + __ ld(c_rarg1, aaddress(0)); + __ load_klass(x13, c_rarg1); + __ lwu(x13, Address(x13, Klass::access_flags_offset())); + Label skip_register_finalizer; + __ andi(t0, x13, JVM_ACC_HAS_FINALIZER); + __ beqz(t0, skip_register_finalizer); + + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1); + + __ bind(skip_register_finalizer); + } + + // Issue a StoreStore barrier after all stores but before return + // from any constructor for any class with a final field. We don't + // know if this is a finalizer, so we always do so. + if (_desc->bytecode() == Bytecodes::_return) { + __ membar(MacroAssembler::StoreStore); + } + + // Narrow result if state is itos but result type is smaller. + // Need to narrow in the return bytecode rather than in generate_return_entry + // since compiled code callers expect the result to already be narrowed. + if (state == itos) { + __ narrow(x10); + } + + __ remove_activation(state); + __ ret(); +} + + +// ---------------------------------------------------------------------------- +// Volatile variables demand their effects be made known to all CPU's +// in order. Store buffers on most chips allow reads & writes to +// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode +// without some kind of memory barrier (i.e., it's not sufficient that +// the interpreter does not reorder volatile references, the hardware +// also must not reorder them). +// +// According to the new Java Memory Model (JMM): +// (1) All volatiles are serialized wrt to each other. ALSO reads & +// writes act as aquire & release, so: +// (2) A read cannot let unrelated NON-volatile memory refs that +// happen after the read float up to before the read. It's OK for +// non-volatile memory refs that happen before the volatile read to +// float down below it. +// (3) Similar a volatile write cannot let unrelated NON-volatile +// memory refs that happen BEFORE the write float down to after the +// write. It's OK for non-volatile memory refs that happen after the +// volatile write to float up before it. +// +// We only put in barriers around volatile refs (they are expensive), +// not _between_ memory refs (that would require us to track the +// flavor of the previous memory refs). Requirements (2) and (3) +// require some barriers before volatile stores and after volatile +// loads. These nearly cover requirement (1) but miss the +// volatile-store-volatile-load case. This final case is placed after +// volatile-stores although it could just as well go before +// volatile-loads. + +void TemplateTable::resolve_cache_and_index(int byte_no, + Register Rcache, + Register index, + size_t index_size) { + const Register temp = x9; + assert_different_registers(Rcache, index, temp); + + Label resolved, clinit_barrier_slow; + + Bytecodes::Code code = bytecode(); + switch (code) { + case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; + case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; + default: break; + } + + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); + __ mv(t0, (int) code); + __ beq(temp, t0, resolved); + + // resolve first time through + // Class initialization barrier slow path lands here as well. + __ bind(clinit_barrier_slow); + + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); + __ mv(temp, (int) code); + __ call_VM(noreg, entry, temp); + + // Update registers with resolved info + __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); + // n.b. unlike x86 Rcache is now rcpool plus the indexed offset + // so all clients ofthis method must be modified accordingly + __ bind(resolved); + + // Class initialization barrier for static methods + if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) { + __ load_resolved_method_at_index(byte_no, temp, Rcache); + __ load_method_holder(temp, temp); + __ clinit_barrier(temp, t0, NULL, &clinit_barrier_slow); + } +} + +// The Rcache and index registers must be set before call +// n.b unlike x86 cache already includes the index offset +void TemplateTable::load_field_cp_cache_entry(Register obj, + Register cache, + Register index, + Register off, + Register flags, + bool is_static = false) { + assert_different_registers(cache, index, flags, off); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + // Field offset + __ ld(off, Address(cache, in_bytes(cp_base_offset + + ConstantPoolCacheEntry::f2_offset()))); + // Flags + __ lwu(flags, Address(cache, in_bytes(cp_base_offset + + ConstantPoolCacheEntry::flags_offset()))); + + // klass overwrite register + if (is_static) { + __ ld(obj, Address(cache, in_bytes(cp_base_offset + + ConstantPoolCacheEntry::f1_offset()))); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ ld(obj, Address(obj, mirror_offset)); + __ resolve_oop_handle(obj); + } +} + +void TemplateTable::load_invoke_cp_cache_entry(int byte_no, + Register method, + Register itable_index, + Register flags, + bool is_invokevirtual, + bool is_invokevfinal, /*unused*/ + bool is_invokedynamic) { + // setup registers + const Register cache = t1; + const Register index = x14; + assert_different_registers(method, flags); + assert_different_registers(method, cache, index); + assert_different_registers(itable_index, flags); + assert_different_registers(itable_index, cache, index); + // determine constant pool cache field offsets + assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant"); + const int method_offset = in_bytes(ConstantPoolCache::base_offset() + + (is_invokevirtual ? + ConstantPoolCacheEntry::f2_offset() : + ConstantPoolCacheEntry::f1_offset())); + const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()); + // access constant pool cache fields + const int index_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()); + + const size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2)); + resolve_cache_and_index(byte_no, cache, index, index_size); + __ ld(method, Address(cache, method_offset)); + + if (itable_index != noreg) { + __ ld(itable_index, Address(cache, index_offset)); + } + __ lwu(flags, Address(cache, flags_offset)); +} + +// The registers cache and index expected to be set before call. +// Correct values of the cache and index registers are preserved. +void TemplateTable::jvmti_post_field_access(Register cache, Register index, + bool is_static, bool has_tos) { + // do the JVMTI work here to avoid disturbing the register state below + // We use c_rarg registers here beacause we want to use the register used in + // the call to the VM + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the time to call into the VM. + Label L1; + assert_different_registers(cache, index, x10); + int32_t offset = 0; + __ la_patchable(t0, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), offset); + __ lwu(x10, Address(t0, offset)); + + __ beqz(x10, L1); + + __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1); + __ la(c_rarg2, Address(c_rarg2, in_bytes(ConstantPoolCache::base_offset()))); + + if (is_static) { + __ mv(c_rarg1, zr); // NULL object reference + } else { + __ ld(c_rarg1, at_tos()); // get object pointer without popping it + __ verify_oop(c_rarg1); + } + // c_rarg1: object pointer or NULL + // c_rarg2: cache entry pointer + // c_rarg3: jvalue object on the stack + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_access), + c_rarg1, c_rarg2, c_rarg3); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +void TemplateTable::pop_and_check_object(Register r) +{ + __ pop_ptr(r); + __ null_check(r); // for field access must check obj. + __ verify_oop(r); +} + +void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) +{ + const Register cache = x12; + const Register index = x13; + const Register obj = x14; + const Register off = x9; + const Register flags = x10; + const Register raw_flags = x16; + const Register bc = x14; // uses same reg as obj, so don't mix them + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_access(cache, index, is_static, false); + load_field_cp_cache_entry(obj, cache, index, off, raw_flags, is_static); + + if (!is_static) { + // obj is on the stack + pop_and_check_object(obj); + } + + __ add(off, obj, off); + const Address field(off); + + Label Done, notByte, notBool, notInt, notShort, notChar, + notLong, notFloat, notObj, notDouble; + + __ slli(flags, raw_flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + + ConstantPoolCacheEntry::tos_state_bits)); + __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); + + assert(btos == 0, "change code, btos != 0"); + __ bnez(flags, notByte); + + // Dont't rewrite getstatic, only getfield + if (is_static) { + rc = may_not_rewrite; + } + + // btos + __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg); + __ push(btos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11); + } + __ j(Done); + + __ bind(notByte); + __ sub(t0, flags, (u1)ztos); + __ bnez(t0, notBool); + + // ztos (same code as btos) + __ access_load_at(T_BOOLEAN, IN_HEAP, x10, field, noreg, noreg); + __ push(ztos); + // Rewirte bytecode to be faster + if (rc == may_rewrite) { + // uses btos rewriting, no truncating to t/f bit is needed for getfield + patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11); + } + __ j(Done); + + __ bind(notBool); + __ sub(t0, flags, (u1)atos); + __ bnez(t0, notObj); + // atos + do_oop_load(_masm, field, x10, IN_HEAP); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, x11); + } + __ j(Done); + + __ bind(notObj); + __ sub(t0, flags, (u1)itos); + __ bnez(t0, notInt); + // itos + __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg); + __ addw(x10, x10, zr); // signed extended + __ push(itos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_igetfield, bc, x11); + } + __ j(Done); + + __ bind(notInt); + __ sub(t0, flags, (u1)ctos); + __ bnez(t0, notChar); + // ctos + __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg); + __ push(ctos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cgetfield, bc, x11); + } + __ j(Done); + + __ bind(notChar); + __ sub(t0, flags, (u1)stos); + __ bnez(t0, notShort); + // stos + __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg); + __ push(stos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sgetfield, bc, x11); + } + __ j(Done); + + __ bind(notShort); + __ sub(t0, flags, (u1)ltos); + __ bnez(t0, notLong); + // ltos + __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg); + __ push(ltos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lgetfield, bc, x11); + } + __ j(Done); + + __ bind(notLong); + __ sub(t0, flags, (u1)ftos); + __ bnez(t0, notFloat); + // ftos + __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); + __ push(ftos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fgetfield, bc, x11); + } + __ j(Done); + + __ bind(notFloat); +#ifdef ASSERT + __ sub(t0, flags, (u1)dtos); + __ bnez(t0, notDouble); +#endif + // dtos + __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* ftos */, field, noreg, noreg); + __ push(dtos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dgetfield, bc, x11); + } +#ifdef ASSERT + __ j(Done); + + __ bind(notDouble); + __ stop("Bad state"); +#endif + + __ bind(Done); + + Label notVolatile; + __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift); + __ beqz(t0, notVolatile); + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ bind(notVolatile); +} + +void TemplateTable::getfield(int byte_no) +{ + getfield_or_static(byte_no, false); +} + +void TemplateTable::nofast_getfield(int byte_no) { + getfield_or_static(byte_no, false, may_not_rewrite); +} + +void TemplateTable::getstatic(int byte_no) +{ + getfield_or_static(byte_no, true); +} + +// The registers cache and index expected to be set before call. +// The function may destroy various registers, just not the cache and index registers. +void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { + transition(vtos, vtos); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L1; + assert_different_registers(cache, index, x10); + int32_t offset = 0; + __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset); + __ lwu(x10, Address(t0, offset)); + __ beqz(x10, L1); + + __ get_cache_and_index_at_bcp(c_rarg2, t0, 1); + + if (is_static) { + // Life is simple. Null out the object pointer. + __ mv(c_rarg1, zr); + } else { + // Life is harder. The stack holds the value on top, followed by + // the object. We don't know the size of the value, though; it + // could be one or two words depending on its type. As a result, + // we must find the type to determine where the object is. + __ lwu(c_rarg3, Address(c_rarg2, + in_bytes(cp_base_offset + + ConstantPoolCacheEntry::flags_offset()))); + __ srli(c_rarg3, c_rarg3, ConstantPoolCacheEntry::tos_state_shift); + ConstantPoolCacheEntry::verify_tos_state_shift(); + Label nope2, done, ok; + __ ld(c_rarg1, at_tos_p1()); // initially assume a one word jvalue + __ sub(t0, c_rarg3, ltos); + __ beqz(t0, ok); + __ sub(t0, c_rarg3, dtos); + __ bnez(t0, nope2); + __ bind(ok); + __ ld(c_rarg1, at_tos_p2()); // ltos (two word jvalue); + __ bind(nope2); + } + // cache entry pointer + __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset)); + // object (tos) + __ mv(c_rarg3, esp); + // c_rarg1: object pointer set up above (NULL if static) + // c_rarg2: cache entry pointer + // c_rarg3: jvalue object on the stack + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_modification), + c_rarg1, c_rarg2, c_rarg3); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { + transition(vtos, vtos); + + const Register cache = x12; + const Register index = x13; + const Register obj = x12; + const Register off = x9; + const Register flags = x10; + const Register bc = x14; + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_mod(cache, index, is_static); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + + Label Done; + __ mv(x15, flags); + + { + Label notVolatile; + __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift); + __ beqz(t0, notVolatile); + __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore); + __ bind(notVolatile); + } + + Label notByte, notBool, notInt, notShort, notChar, + notLong, notFloat, notObj, notDouble; + + __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + + ConstantPoolCacheEntry::tos_state_bits)); + __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); + + assert(btos == 0, "change code, btos != 0"); + __ bnez(flags, notByte); + + // Don't rewrite putstatic, only putfield + if (is_static) { + rc = may_not_rewrite; + } + + // btos + { + __ pop(btos); + // field address + if (!is_static) { + pop_and_check_object(obj); + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); // off register as temparator register. + __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bputfield, bc, x11, true, byte_no); + } + __ j(Done); + } + + __ bind(notByte); + __ sub(t0, flags, (u1)ztos); + __ bnez(t0, notBool); + + // ztos + { + __ pop(ztos); + // field address + if (!is_static) { + pop_and_check_object(obj); + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); + __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_zputfield, bc, x11, true, byte_no); + } + __ j(Done); + } + + __ bind(notBool); + __ sub(t0, flags, (u1)atos); + __ bnez(t0, notObj); + + // atos + { + __ pop(atos); + // field address + if (!is_static) { + pop_and_check_object(obj); + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); + // Store into the field + do_oop_store(_masm, field, x10, IN_HEAP); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, x11, true, byte_no); + } + __ j(Done); + } + + __ bind(notObj); + __ sub(t0, flags, (u1)itos); + __ bnez(t0, notInt); + + // itos + { + __ pop(itos); + // field address + if (!is_static) { + pop_and_check_object(obj); + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); + __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_iputfield, bc, x11, true, byte_no); + } + __ j(Done); + } + + __ bind(notInt); + __ sub(t0, flags, (u1)ctos); + __ bnez(t0, notChar); + + // ctos + { + __ pop(ctos); + // field address + if (!is_static) { + pop_and_check_object(obj); + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); + __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cputfield, bc, x11, true, byte_no); + } + __ j(Done); + } + + __ bind(notChar); + __ sub(t0, flags, (u1)stos); + __ bnez(t0, notShort); + + // stos + { + __ pop(stos); + // field address + if (!is_static) { + pop_and_check_object(obj); + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); + __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sputfield, bc, x11, true, byte_no); + } + __ j(Done); + } + + __ bind(notShort); + __ sub(t0, flags, (u1)ltos); + __ bnez(t0, notLong); + + // ltos + { + __ pop(ltos); + // field address + if (!is_static) { + pop_and_check_object(obj); + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); + __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lputfield, bc, x11, true, byte_no); + } + __ j(Done); + } + + __ bind(notLong); + __ sub(t0, flags, (u1)ftos); + __ bnez(t0, notFloat); + + // ftos + { + __ pop(ftos); + // field address + if (!is_static) { + pop_and_check_object(obj); + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); + __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fputfield, bc, x11, true, byte_no); + } + __ j(Done); + } + + __ bind(notFloat); +#ifdef ASSERT + __ sub(t0, flags, (u1)dtos); + __ bnez(t0, notDouble); +#endif + + // dtos + { + __ pop(dtos); + // field address + if (!is_static) { + pop_and_check_object(obj); + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); + __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dputfield, bc, x11, true, byte_no); + } + } + +#ifdef ASSERT + __ j(Done); + + __ bind(notDouble); + __ stop("Bad state"); +#endif + + __ bind(Done); + + { + Label notVolatile; + __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift); + __ beqz(t0, notVolatile); + __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore); + __ bind(notVolatile); + } +} + +void TemplateTable::putfield(int byte_no) +{ + putfield_or_static(byte_no, false); +} + +void TemplateTable::nofast_putfield(int byte_no) { + putfield_or_static(byte_no, false, may_not_rewrite); +} + +void TemplateTable::putstatic(int byte_no) { + putfield_or_static(byte_no, true); +} + +void TemplateTable::jvmti_post_fast_field_mod() +{ + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L2; + int32_t offset = 0; + __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset); + __ lwu(c_rarg3, Address(t0, offset)); + __ beqz(c_rarg3, L2); + __ pop_ptr(x9); // copy the object pointer from tos + __ verify_oop(x9); + __ push_ptr(x9); // put the object pointer back on tos + // Save tos values before call_VM() clobbers them. Since we have + // to do it for every data type, we use the saved values as the + // jvalue object. + switch (bytecode()) { // load values into the jvalue object + case Bytecodes::_fast_aputfield: __ push_ptr(x10); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ push_i(x10); break; + case Bytecodes::_fast_dputfield: __ push_d(); break; + case Bytecodes::_fast_fputfield: __ push_f(); break; + case Bytecodes::_fast_lputfield: __ push_l(x10); break; + + default: + ShouldNotReachHere(); + } + __ mv(c_rarg3, esp); // points to jvalue on the stack + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(c_rarg2, x10, 1); + __ verify_oop(x9); + // x9: object pointer copied above + // c_rarg2: cache entry pointer + // c_rarg3: jvalue object on the stack + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_modification), + x9, c_rarg2, c_rarg3); + + switch (bytecode()) { // restore tos values + case Bytecodes::_fast_aputfield: __ pop_ptr(x10); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ pop_i(x10); break; + case Bytecodes::_fast_dputfield: __ pop_d(); break; + case Bytecodes::_fast_fputfield: __ pop_f(); break; + case Bytecodes::_fast_lputfield: __ pop_l(x10); break; + default: break; + } + __ bind(L2); + } +} + +void TemplateTable::fast_storefield(TosState state) +{ + transition(state, vtos); + + ByteSize base = ConstantPoolCache::base_offset(); + + jvmti_post_fast_field_mod(); + + // access constant pool cache + __ get_cache_and_index_at_bcp(x12, x11, 1); + + // Must prevent reordering of the following cp cache loads with bytecode load + __ membar(MacroAssembler::LoadLoad); + + // test for volatile with x13 + __ lwu(x13, Address(x12, in_bytes(base + + ConstantPoolCacheEntry::flags_offset()))); + + // replace index with field offset from cache entry + __ ld(x11, Address(x12, in_bytes(base + ConstantPoolCacheEntry::f2_offset()))); + + { + Label notVolatile; + __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); + __ beqz(t0, notVolatile); + __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore); + __ bind(notVolatile); + } + + // Get object from stack + pop_and_check_object(x12); + + // field address + __ add(x11, x12, x11); + const Address field(x11, 0); + + // access field + switch (bytecode()) { + case Bytecodes::_fast_aputfield: + do_oop_store(_masm, field, x10, IN_HEAP); + break; + case Bytecodes::_fast_lputfield: + __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_iputfield: + __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_zputfield: + __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_bputfield: + __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_sputfield: + __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_cputfield: + __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_fputfield: + __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg); + break; + case Bytecodes::_fast_dputfield: + __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); + __ beqz(t0, notVolatile); + __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore); + __ bind(notVolatile); + } +} + +void TemplateTable::fast_accessfield(TosState state) +{ + transition(atos, state); + // Do the JVMTI work here to avoid disturbing the register state below + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the time to call into the VM. + Label L1; + int32_t offset = 0; + __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_access_count_addr()), offset); + __ lwu(x12, Address(t0, offset)); + __ beqz(x12, L1); + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(c_rarg2, t1, 1); + __ verify_oop(x10); + __ push_ptr(x10); // save object pointer before call_VM() clobbers it + __ mv(c_rarg1, x10); + // c_rarg1: object pointer copied above + // c_rarg2: cache entry pointer + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_access), + c_rarg1, c_rarg2); + __ pop_ptr(x10); // restore object pointer + __ bind(L1); + } + + // access constant pool cache + __ get_cache_and_index_at_bcp(x12, x11, 1); + + // Must prevent reordering of the following cp cache loads with bytecode load + __ membar(MacroAssembler::LoadLoad); + + __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()))); + __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()))); + + // x10: object + __ verify_oop(x10); + __ null_check(x10); + __ add(x11, x10, x11); + const Address field(x11, 0); + + // access field + switch (bytecode()) { + case Bytecodes::_fast_agetfield: + do_oop_load(_masm, field, x10, IN_HEAP); + __ verify_oop(x10); + break; + case Bytecodes::_fast_lgetfield: + __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg); + break; + case Bytecodes::_fast_igetfield: + __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg); + __ addw(x10, x10, zr); // signed extended + break; + case Bytecodes::_fast_bgetfield: + __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg); + break; + case Bytecodes::_fast_sgetfield: + __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg); + break; + case Bytecodes::_fast_cgetfield: + __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg); + break; + case Bytecodes::_fast_fgetfield: + __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); + break; + case Bytecodes::_fast_dgetfield: + __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg); + break; + default: + ShouldNotReachHere(); + } + { + Label notVolatile; + __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); + __ beqz(t0, notVolatile); + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ bind(notVolatile); + } +} + +void TemplateTable::fast_xaccess(TosState state) +{ + transition(vtos, state); + + // get receiver + __ ld(x10, aaddress(0)); + // access constant pool cache + __ get_cache_and_index_at_bcp(x12, x13, 2); + __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()))); + + // make sure exception is reported in correct bcp range (getfield is + // next instruction) + __ addi(xbcp, xbcp, 1); + __ null_check(x10); + switch (state) { + case itos: + __ add(x10, x10, x11); + __ access_load_at(T_INT, IN_HEAP, x10, Address(x10, 0), noreg, noreg); + __ addw(x10, x10, zr); // signed extended + break; + case atos: + __ add(x10, x10, x11); + do_oop_load(_masm, Address(x10, 0), x10, IN_HEAP); + __ verify_oop(x10); + break; + case ftos: + __ add(x10, x10, x11); + __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(x10), noreg, noreg); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()))); + __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); + __ beqz(t0, notVolatile); + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ bind(notVolatile); + } + + __ sub(xbcp, xbcp, 1); +} + +//----------------------------------------------------------------------------- +// Calls + +void TemplateTable::prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index, // itable index, MethodType, etc. + Register recv, // if caller wants to see it + Register flags // if caller wants to test it + ) { + // determine flags + const Bytecodes::Code code = bytecode(); + const bool is_invokeinterface = code == Bytecodes::_invokeinterface; + const bool is_invokedynamic = code == Bytecodes::_invokedynamic; + const bool is_invokehandle = code == Bytecodes::_invokehandle; + const bool is_invokevirtual = code == Bytecodes::_invokevirtual; + const bool is_invokespecial = code == Bytecodes::_invokespecial; + const bool load_receiver = (recv != noreg); + const bool save_flags = (flags != noreg); + assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), ""); + assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); + assert(flags == noreg || flags == x13, ""); + assert(recv == noreg || recv == x12, ""); + + // setup registers & access constant pool cache + if (recv == noreg) { + recv = x12; + } + if (flags == noreg) { + flags = x13; + } + assert_different_registers(method, index, recv, flags); + + // save 'interpreter return address' + __ save_bcp(); + + load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); + + // maybe push appendix to arguments (just before return address) + if (is_invokedynamic || is_invokehandle) { + Label L_no_push; + __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::has_appendix_shift); + __ beqz(t0, L_no_push); + // Push the appendix as a trailing parameter. + // This must be done before we get the receiver, + // since the parameter_size includes it. + __ push_reg(x9); + __ mv(x9, index); + __ load_resolved_reference_at_index(index, x9); + __ pop_reg(x9); + __ push_reg(index); // push appendix (MethodType, CallSite, etc.) + __ bind(L_no_push); + } + + // load receiver if needed (note: no return address pushed yet) + if (load_receiver) { + __ andi(recv, flags, ConstantPoolCacheEntry::parameter_size_mask); // parameter_size_mask = 1 << 8 + __ shadd(t0, recv, esp, t0, 3); + __ ld(recv, Address(t0, -Interpreter::expr_offset_in_bytes(1))); + __ verify_oop(recv); + } + + // compute return type + __ slli(t1, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits)); + __ srli(t1, t1, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> t1:0~3 + + // load return address + { + const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code); + __ mv(t0, table_addr); + __ shadd(t0, t1, t0, t1, 3); + __ ld(ra, Address(t0, 0)); + } +} + +void TemplateTable::invokevirtual_helper(Register index, + Register recv, + Register flags) +{ + // Uses temporary registers x10, x13 + assert_different_registers(index, recv, x10, x13); + // Test for an invoke of a final method + Label notFinal; + __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::is_vfinal_shift); + __ beqz(t0, notFinal); + + const Register method = index; // method must be xmethod + assert(method == xmethod, "Method must be xmethod for interpreter calling convention"); + + // do the call - the index is actually the method to call + // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method* + + // It's final, need a null check here! + __ null_check(recv); + + // profile this call + __ profile_final_call(x10); + __ profile_arguments_type(x10, method, x14, true); + + __ jump_from_interpreted(method); + + __ bind(notFinal); + + // get receiver klass + __ null_check(recv, oopDesc::klass_offset_in_bytes()); + __ load_klass(x10, recv); + + // profile this call + __ profile_virtual_call(x10, xlocals, x13); + + // get target Method & entry point + __ lookup_virtual_method(x10, index, method); + __ profile_arguments_type(x13, method, x14, true); + __ jump_from_interpreted(method); +} + +void TemplateTable::invokevirtual(int byte_no) +{ + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + + prepare_invoke(byte_no, xmethod, noreg, x12, x13); + + // xmethod: index (actually a Method*) + // x12: receiver + // x13: flags + + invokevirtual_helper(xmethod, x12, x13); +} + +void TemplateTable::invokespecial(int byte_no) +{ + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + prepare_invoke(byte_no, xmethod, noreg, // get f1 Method* + x12); // get receiver also for null check + __ verify_oop(x12); + __ null_check(x12); + // do the call + __ profile_call(x10); + __ profile_arguments_type(x10, xmethod, xbcp, false); + __ jump_from_interpreted(xmethod); +} + +void TemplateTable::invokestatic(int byte_no) +{ + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this arugment"); + + prepare_invoke(byte_no, xmethod); // get f1 Method* + // do the call + __ profile_call(x10); + __ profile_arguments_type(x10, xmethod, x14, false); + __ jump_from_interpreted(xmethod); +} + +void TemplateTable::fast_invokevfinal(int byte_no) +{ + __ call_Unimplemented(); +} + +void TemplateTable::invokeinterface(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + prepare_invoke(byte_no, x10, xmethod, // get f1 Klass*, f2 Method* + x12, x13); // recv, flags + + // x10: interface klass (from f1) + // xmethod: method (from f2) + // x12: receiver + // x13: flags + + // First check for Object case, then private interface method, + // then regular interface method. + + // Special case of invokeinterface called for virtual method of + // java.lang.Object. See cpCache.cpp for details + Label notObjectMethod; + __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_forced_virtual_shift); + __ beqz(t0, notObjectMethod); + + invokevirtual_helper(xmethod, x12, x13); + __ bind(notObjectMethod); + + Label no_such_interface; + + // Check for private method invocation - indicated by vfinal + Label notVFinal; + __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_vfinal_shift); + __ beqz(t0, notVFinal); + + // Check receiver klass into x13 - also a null check + __ null_check(x12, oopDesc::klass_offset_in_bytes()); + __ load_klass(x13, x12); + + Label subtype; + __ check_klass_subtype(x13, x10, x14, subtype); + // If we get here the typecheck failed + __ j(no_such_interface); + __ bind(subtype); + + __ profile_final_call(x10); + __ profile_arguments_type(x10, xmethod, x14, true); + __ jump_from_interpreted(xmethod); + + __ bind(notVFinal); + + // Get receiver klass into x13 - also a null check + __ restore_locals(); + __ null_check(x12, oopDesc::klass_offset_in_bytes()); + __ load_klass(x13, x12); + + Label no_such_method; + + // Preserve method for the throw_AbstractMethodErrorVerbose. + __ mv(x28, xmethod); + // Receiver subtype check against REFC. + // Superklass in x10. Subklass in x13. Blows t1, x30 + __ lookup_interface_method(// inputs: rec. class, interface, itable index + x13, x10, noreg, + // outputs: scan temp. reg, scan temp. reg + t1, x30, + no_such_interface, + /*return_method=*/false); + + // profile this call + __ profile_virtual_call(x13, x30, x9); + + // Get declaring interface class from method, and itable index + __ load_method_holder(x10, xmethod); + __ lwu(xmethod, Address(xmethod, Method::itable_index_offset())); + __ subw(xmethod, xmethod, Method::itable_index_max); + __ negw(xmethod, xmethod); + + // Preserve recvKlass for throw_AbstractMethodErrorVerbose + __ mv(xlocals, x13); + __ lookup_interface_method(// inputs: rec. class, interface, itable index + xlocals, x10, xmethod, + // outputs: method, scan temp. reg + xmethod, x30, + no_such_interface); + + // xmethod: Method to call + // x12: receiver + // Check for abstract method error + // Note: This should be done more efficiently via a throw_abstract_method_error + // interpreter entry point and a conditional jump to it in case of a null + // method. + __ beqz(xmethod, no_such_method); + + __ profile_arguments_type(x13, xmethod, x30, true); + + // do the call + // x12: receiver + // xmethod: Method + __ jump_from_interpreted(xmethod); + __ should_not_reach_here(); + + // exception handling code follows ... + // note: must restore interpreter registers to canonical + // state for exception handling to work correctly! + + __ bind(no_such_method); + // throw exception + __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) + // Pass arguments for generating a verbose error message. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), x13, x28); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + __ bind(no_such_interface); + // throw exceptiong + __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) + // Pass arguments for generating a verbose error message. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), x13, x10); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + return; +} + +void TemplateTable::invokehandle(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + prepare_invoke(byte_no, xmethod, x10, x12); + __ verify_method_ptr(x12); + __ verify_oop(x12); + __ null_check(x12); + + // FIXME: profile the LambdaForm also + + // x30 is safe to use here as a temp reg because it is about to + // be clobbered by jump_from_interpreted(). + __ profile_final_call(x30); + __ profile_arguments_type(x30, xmethod, x14, true); + + __ jump_from_interpreted(xmethod); +} + +void TemplateTable::invokedynamic(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + prepare_invoke(byte_no, xmethod, x10); + + // x10: CallSite object (from cpool->resolved_references[]) + // xmethod: MH.linkToCallSite method (from f2) + + // Note: x10_callsite is already pushed by prepare_invoke + + // %%% should make a type profile for any invokedynamic that takes a ref argument + // profile this call + __ profile_call(xbcp); + __ profile_arguments_type(x13, xmethod, x30, false); + + __ verify_oop(x10); + + __ jump_from_interpreted(xmethod); +} + +//----------------------------------------------------------------------------- +// Allocation + +void TemplateTable::_new() { + transition(vtos, atos); + + __ get_unsigned_2_byte_index_at_bcp(x13, 1); + Label slow_case; + Label done; + Label initialize_header; + Label initialize_object; // including clearing the fields + + __ get_cpool_and_tags(x14, x10); + // Make sure the class we're about to instantiate has been resolved. + // This is done before loading InstanceKlass to be consistent with the order + // how Constant Pool is update (see ConstantPool::klass_at_put) + const int tags_offset = Array::base_offset_in_bytes(); + __ add(t0, x10, x13); + __ la(t0, Address(t0, tags_offset)); + __ membar(MacroAssembler::AnyAny); + __ lbu(t0, t0); + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ sub(t1, t0, (u1)JVM_CONSTANT_Class); + __ bnez(t1, slow_case); + + // get InstanceKlass + __ load_resolved_klass_at_offset(x14, x13, x14, t0); + + // make sure klass is initialized & doesn't have finalizer + // make sure klass is fully initialized + __ lbu(t0, Address(x14, InstanceKlass::init_state_offset())); + __ sub(t1, t0, (u1)InstanceKlass::fully_initialized); + __ bnez(t1, slow_case); + + // get instance_size in InstanceKlass (scaled to a count of bytes) + __ lwu(x13, Address(x14, Klass::layout_helper_offset())); + // test to see if it has a finalizer or is malformed in some way + __ andi(t0, x13, Klass::_lh_instance_slow_path_bit); + __ bnez(t0, slow_case); + + // Allocate the instance: + // If TLAB is enabled: + // Try to allocate in the TLAB. + // If fails, go to the slow path. + // Else If inline contiguous allocations are enabled: + // Try to allocate in eden. + // If fails due to heap end, go to slow path + // + // If TLAB is enabled OR inline contiguous is enabled: + // Initialize the allocation. + // Exit. + // Go to slow path. + const bool allow_shared_alloc = Universe::heap()->supports_inline_contig_alloc(); + + if (UseTLAB) { + __ tlab_allocate(x10, x13, 0, noreg, x11, slow_case); + + if (ZeroTLAB) { + // the fields have been already cleared + __ j(initialize_header); + } else { + // initialize both the header and fields + __ j(initialize_object); + } + } else { + // Allocation in the shared Eden, if allowed. + // + // x13: instance size in bytes + if (allow_shared_alloc) { + __ eden_allocate(x10, x13, 0, x28, slow_case); + } + } + + // If USETLAB or allow_shared_alloc are true, the object is created above and + // there is an initialized need. Otherwise, skip and go to the slow path. + if (UseTLAB || allow_shared_alloc) { + // The object is initialized before the header. If the object size is + // zero, go directly to the header initialization. + __ bind(initialize_object); + __ sub(x13, x13, sizeof(oopDesc)); + __ beqz(x13, initialize_header); + + // Initialize obejct fields + { + __ add(x12, x10, sizeof(oopDesc)); + Label loop; + __ bind(loop); + __ sd(zr, Address(x12)); + __ add(x12, x12, BytesPerLong); + __ sub(x13, x13, BytesPerLong); + __ bnez(x13, loop); + } + + // initialize object hader only. + __ bind(initialize_header); + if (UseBiasedLocking) { + __ ld(t0, Address(x14, Klass::prototype_header_offset())); + } else { + __ mv(t0, (intptr_t)markWord::prototype().value()); + } + __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); + __ store_klass_gap(x10, zr); // zero klass gap for compressed oops + __ store_klass(x10, x14); // store klass last + + { + SkipIfEqual skip(_masm, &DTraceAllocProbes, false); + // Trigger dtrace event for fastpath + __ push(atos); // save the return value + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10); + __ pop(atos); // restore the return value + } + __ j(done); + } + + // slow case + __ bind(slow_case); + __ get_constant_pool(c_rarg1); + __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); + call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2); + __ verify_oop(x10); + + // continue + __ bind(done); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(MacroAssembler::StoreStore); +} + +void TemplateTable::newarray() { + transition(itos, atos); + __ load_unsigned_byte(c_rarg1, at_bcp(1)); + __ mv(c_rarg2, x10); + call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), + c_rarg1, c_rarg2); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(MacroAssembler::StoreStore); +} + +void TemplateTable::anewarray() { + transition(itos, atos); + __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); + __ get_constant_pool(c_rarg1); + __ mv(c_rarg3, x10); + call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), + c_rarg1, c_rarg2, c_rarg3); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(MacroAssembler::StoreStore); +} + +void TemplateTable::arraylength() { + transition(atos, itos); + __ null_check(x10, arrayOopDesc::length_offset_in_bytes()); + __ lwu(x10, Address(x10, arrayOopDesc::length_offset_in_bytes())); +} + +void TemplateTable::checkcast() +{ + transition(atos, atos); + Label done, is_null, ok_is_subtype, quicked, resolved; + __ beqz(x10, is_null); + + // Get cpool & tags index + __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array + __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index + // See if bytecode has already been quicked + __ add(t0, x13, Array::base_offset_in_bytes()); + __ add(x11, t0, x9); + __ membar(MacroAssembler::AnyAny); + __ lbu(x11, x11); + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ sub(t0, x11, (u1)JVM_CONSTANT_Class); + __ beqz(t0, quicked); + + __ push(atos); // save receiver for result, and for GC + call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + // vm_result_2 has metadata result + __ get_vm_result_2(x10, xthread); + __ pop_reg(x13); // restore receiver + __ j(resolved); + + // Get superklass in x10 and subklass in x13 + __ bind(quicked); + __ mv(x13, x10); // Save object in x13; x10 needed for subtype check + __ load_resolved_klass_at_offset(x12, x9, x10, t0); // x10 = klass + + __ bind(resolved); + __ load_klass(x9, x13); + + // Generate subtype check. Blows x12, x15. Object in x13. + // Superklass in x10. Subklass in x9. + __ gen_subtype_check(x9, ok_is_subtype); + + // Come here on failure + __ push_reg(x13); + // object is at TOS + __ j(Interpreter::_throw_ClassCastException_entry); + + // Come here on success + __ bind(ok_is_subtype); + __ mv(x10, x13); // Restore object in x13 + + // Collect counts on whether this test sees NULLs a lot or not. + if (ProfileInterpreter) { + __ j(done); + __ bind(is_null); + __ profile_null_seen(x12); + } else { + __ bind(is_null); // same as 'done' + } + __ bind(done); +} + +void TemplateTable::instanceof() { + transition(atos, itos); + Label done, is_null, ok_is_subtype, quicked, resolved; + __ beqz(x10, is_null); + + // Get cpool & tags index + __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array + __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index + // See if bytecode has already been quicked + __ add(t0, x13, Array::base_offset_in_bytes()); + __ add(x11, t0, x9); + __ membar(MacroAssembler::AnyAny); + __ lbu(x11, x11); + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ sub(t0, x11, (u1)JVM_CONSTANT_Class); + __ beqz(t0, quicked); + + __ push(atos); // save receiver for result, and for GC + call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + // vm_result_2 has metadata result + __ get_vm_result_2(x10, xthread); + __ pop_reg(x13); // restore receiver + __ verify_oop(x13); + __ load_klass(x13, x13); + __ j(resolved); + + // Get superklass in x10 and subklass in x13 + __ bind(quicked); + __ load_klass(x13, x10); + __ load_resolved_klass_at_offset(x12, x9, x10, t0); + + __ bind(resolved); + + // Generate subtype check. Blows x12, x15 + // Superklass in x10. Subklass in x13. + __ gen_subtype_check(x13, ok_is_subtype); + + // Come here on failure + __ mv(x10, zr); + __ j(done); + // Come here on success + __ bind(ok_is_subtype); + __ li(x10, 1); + + // Collect counts on whether this test sees NULLs a lot or not. + if (ProfileInterpreter) { + __ j(done); + __ bind(is_null); + __ profile_null_seen(x12); + } else { + __ bind(is_null); // same as 'done' + } + __ bind(done); + // x10 = 0: obj == NULL or obj is not an instanceof the specified klass + // x10 = 1: obj != NULL and obj is an instanceof the specified klass +} + +//----------------------------------------------------------------------------- +// Breakpoints +void TemplateTable::_breakpoint() { + // Note: We get here even if we are single stepping.. + // jbug inists on setting breakpoints at every bytecode + // even if we are in single step mode. + + transition(vtos, vtos); + + // get the unpatched byte code + __ get_method(c_rarg1); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::get_original_bytecode_at), + c_rarg1, xbcp); + __ mv(x9, x10); + + // post the breakpoint event + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), + xmethod, xbcp); + + // complete the execution of original bytecode + __ mv(t0, x9); + __ dispatch_only_normal(vtos); +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateTable::athrow() { + transition(atos, vtos); + __ null_check(x10); + __ j(Interpreter::throw_exception_entry()); +} + +//----------------------------------------------------------------------------- +// Synchronization +// +// Note: monitorenter & exit are symmetric routines; which is reflected +// in the assembly code structure as well +// +// Stack layout: +// +// [expressions ] <--- esp = expression stack top +// .. +// [expressions ] +// [monitor entry] <--- monitor block top = expression stack bot +// .. +// [monitor entry] +// [frame data ] <--- monitor block bot +// ... +// [saved fp ] <--- fp +void TemplateTable::monitorenter() +{ + transition(atos, vtos); + + // check for NULL object + __ null_check(x10); + + const Address monitor_block_top( + fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); + const Address monitor_block_bot( + fp, frame::interpreter_frame_initial_sp_offset * wordSize); + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + Label allocated; + + // initialize entry pointer + __ mv(c_rarg1, zr); // points to free slot or NULL + + // find a free slot in the monitor block (result in c_rarg1) + { + Label entry, loop, exit, notUsed; + __ ld(c_rarg3, monitor_block_top); // points to current entry, + // starting with top-most entry + __ la(c_rarg2, monitor_block_bot); // points to word before bottom + + __ j(entry); + + __ bind(loop); + // check if current entry is used + // if not used then remember entry in c_rarg1 + __ ld(t0, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes())); + __ bnez(t0, notUsed); + __ mv(c_rarg1, c_rarg3); + __ bind(notUsed); + // check if current entry is for same object + // if same object then stop searching + __ beq(x10, t0, exit); + // otherwise advance to next entry + __ add(c_rarg3, c_rarg3, entry_size); + __ bind(entry); + // check if bottom reached + // if not at bottom then check this entry + __ bne(c_rarg3, c_rarg2, loop); + __ bind(exit); + } + + __ bnez(c_rarg1, allocated); // check if a slot has been found and + // if found, continue with that on + + // allocate one if there's no free slot + { + Label entry, loop; + // 1. compute new pointers // esp: old expression stack top + __ ld(c_rarg1, monitor_block_bot); // c_rarg1: old expression stack bottom + __ sub(esp, esp, entry_size); // move expression stack top + __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom + __ mv(c_rarg3, esp); // set start value for copy loop + __ sd(c_rarg1, monitor_block_bot); // set new monitor block bottom + __ sub(sp, sp, entry_size); // make room for the monitor + + __ j(entry); + // 2. move expression stack contents + __ bind(loop); + __ ld(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack + // word from old location + __ sd(c_rarg2, Address(c_rarg3, 0)); // and store it at new location + __ add(c_rarg3, c_rarg3, wordSize); // advance to next word + __ bind(entry); + __ bne(c_rarg3, c_rarg1, loop); // check if bottom reached.if not at bottom + // then copy next word + } + + // call run-time routine + // c_rarg1: points to monitor entry + __ bind(allocated); + + // Increment bcp to point to the next bytecode, so exception + // handling for async. exceptions work correctly. + // The object has already been poped from the stack, so the + // expression stack looks correct. + __ addi(xbcp, xbcp, 1); + + // store object + __ sd(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + __ lock_object(c_rarg1); + + // check to make sure this monitor doesn't cause stack overflow after locking + __ save_bcp(); // in case of exception + __ generate_stack_overflow_check(0); + + // The bcp has already been incremented. Just need to dispatch to + // next instruction. + __ dispatch_next(vtos); +} + +void TemplateTable::monitorexit() +{ + transition(atos, vtos); + + // check for NULL object + __ null_check(x10); + + const Address monitor_block_top( + fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); + const Address monitor_block_bot( + fp, frame::interpreter_frame_initial_sp_offset * wordSize); + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + Label found; + + // find matching slot + { + Label entry, loop; + __ ld(c_rarg1, monitor_block_top); // points to current entry, + // starting with top-most entry + __ la(c_rarg2, monitor_block_bot); // points to word before bottom + // of monitor block + __ j(entry); + + __ bind(loop); + // check if current entry is for same object + __ ld(t0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + // if same object then stop searching + __ beq(x10, t0, found); + // otherwise advance to next entry + __ add(c_rarg1, c_rarg1, entry_size); + __ bind(entry); + // check if bottom reached + // if not at bottom then check this entry + __ bne(c_rarg1, c_rarg2, loop); + } + + // error handling. Unlocking was not block-structured + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + // call run-time routine + __ bind(found); + __ push_ptr(x10); // make sure object is on stack (contract with oopMaps) + __ unlock_object(c_rarg1); + __ pop_ptr(x10); // discard object +} + +// Wide instructions +void TemplateTable::wide() +{ + __ load_unsigned_byte(x9, at_bcp(1)); + __ mv(t0, (address)Interpreter::_wentry_point); + __ shadd(t0, x9, t0, t1, 3); + __ ld(t0, Address(t0)); + __ jr(t0); +} + +// Multi arrays +void TemplateTable::multianewarray() { + transition(vtos, atos); + __ load_unsigned_byte(x10, at_bcp(3)); // get number of dimensions + // last dim is on top of stack; we want address of first one: + // first_addr = last_addr + (ndims - 1) * wordSize + __ shadd(c_rarg1, x10, esp, c_rarg1, 3); + __ sub(c_rarg1, c_rarg1, wordSize); + call_VM(x10, + CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), + c_rarg1); + __ load_unsigned_byte(x11, at_bcp(3)); + __ shadd(esp, x11, esp, t0, 3); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/templateTable_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/templateTable_riscv.hpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_TEMPLATETABLE_RISCV_HPP +#define CPU_RISCV_TEMPLATETABLE_RISCV_HPP + +static void prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index = noreg, // itable index, MethodType, etc. + Register recv = noreg, // if caller wants to see it + Register flags = noreg // if caller wants to test it + ); +static void invokevirtual_helper(Register index, Register recv, + Register flags); + +// Helpers +static void index_check(Register array, Register index); + +#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "prims/universalNativeInvoker.hpp" +#include "utilities/debug.hpp" + +address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) { + Unimplemented(); + return nullptr; +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "prims/universalUpcallHandler.hpp" +#include "utilities/debug.hpp" + +address ProgrammableUpcallHandler::generate_upcall_stub(jobject jrec, jobject jabi, jobject jlayout) { + Unimplemented(); + return nullptr; +} + +address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) { + ShouldNotCallThis(); + return nullptr; +} + +bool ProgrammableUpcallHandler::supports_optimized_upcalls() { + return false; +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vmStructs_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vmStructs_riscv.hpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_VMSTRUCTS_RISCV_HPP +#define CPU_RISCV_VMSTRUCTS_RISCV_HPP + +// These are the CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) + +#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) + +#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "memory/allocation.hpp" +#include "memory/allocation.inline.hpp" +#include "runtime/os.inline.hpp" +#include "vm_version_ext_riscv.hpp" + +// VM_Version_Ext statics +int VM_Version_Ext::_no_of_threads = 0; +int VM_Version_Ext::_no_of_cores = 0; +int VM_Version_Ext::_no_of_sockets = 0; +bool VM_Version_Ext::_initialized = false; +char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; +char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; + + +void VM_Version_Ext::initialize_cpu_information(void) { + // do nothing if cpu info has been initialized + if (_initialized) { + return; + } + + _no_of_cores = os::processor_count(); + _no_of_threads = _no_of_cores; + _no_of_sockets = _no_of_cores; + snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string); + _initialized = true; +} + +int VM_Version_Ext::number_of_threads(void) { + initialize_cpu_information(); + return _no_of_threads; +} + +int VM_Version_Ext::number_of_cores(void) { + initialize_cpu_information(); + return _no_of_cores; +} + +int VM_Version_Ext::number_of_sockets(void) { + initialize_cpu_information(); + return _no_of_sockets; +} + +const char* VM_Version_Ext::cpu_name(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); + return tmp; +} + +const char* VM_Version_Ext::cpu_description(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); + return tmp; +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP +#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP + +#include "runtime/vm_version.hpp" +#include "utilities/macros.hpp" + +class VM_Version_Ext : public VM_Version { + private: + static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; + static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; + + static int _no_of_threads; + static int _no_of_cores; + static int _no_of_sockets; + static bool _initialized; + static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; + static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; + + public: + static int number_of_threads(void); + static int number_of_cores(void); + static int number_of_sockets(void); + + static const char* cpu_name(void); + static const char* cpu_description(void); + static void initialize_cpu_information(void); + +}; + +#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vm_version_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vm_version_riscv.cpp @@ -0,0 +1,218 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/java.hpp" +#include "runtime/os.hpp" +#include "runtime/vm_version.hpp" +#include "utilities/formatBuffer.hpp" +#include "utilities/macros.hpp" + +#include OS_HEADER_INLINE(os) + +const char* VM_Version::_uarch = ""; +uint32_t VM_Version::_initial_vector_length = 0; + +void VM_Version::initialize() { + get_os_cpu_info(); + + if (FLAG_IS_DEFAULT(UseFMA)) { + FLAG_SET_DEFAULT(UseFMA, true); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0); + } + + if (UseAES || UseAESIntrinsics) { + if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { + warning("AES instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseAES, false); + } + if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("AES intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + } + + if (UseAESCTRIntrinsics) { + warning("AES/CTR intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); + } + + if (UseSHA) { + warning("SHA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA, false); + } + + if (UseSHA1Intrinsics) { + warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + } + + if (UseSHA256Intrinsics) { + warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + } + + if (UseSHA512Intrinsics) { + warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + + if (UseSHA3Intrinsics) { + warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); + } + + if (UseCRC32Intrinsics) { + warning("CRC32 intrinsics are not available on this CPU."); + FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); + } + + if (UseCRC32CIntrinsics) { + warning("CRC32C intrinsics are not available on this CPU."); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + + if (UseMD5Intrinsics) { + warning("MD5 intrinsics are not available on this CPU."); + FLAG_SET_DEFAULT(UseMD5Intrinsics, false); + } + + if (UseRVV) { + if (!(_features & CPU_V)) { + warning("RVV is not supported on this CPU"); + FLAG_SET_DEFAULT(UseRVV, false); + } else { + // read vector length from vector CSR vlenb + _initial_vector_length = get_current_vector_length(); + } + } + + if (UseRVB && !(_features & CPU_B)) { + warning("RVB is not supported on this CPU"); + FLAG_SET_DEFAULT(UseRVB, false); + } + + if (UseRVC && !(_features & CPU_C)) { + warning("RVC is not supported on this CPU"); + FLAG_SET_DEFAULT(UseRVC, false); + } + + if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) { + FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true); + } + + if (UseRVB) { + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { + FLAG_SET_DEFAULT(UsePopCountInstruction, true); + } + } else { + FLAG_SET_DEFAULT(UsePopCountInstruction, false); + } + + char buf[512]; + buf[0] = '\0'; + if (_uarch != NULL && strcmp(_uarch, "") != 0) snprintf(buf, sizeof(buf), "%s,", _uarch); + strcat(buf, "rv64"); +#define ADD_FEATURE_IF_SUPPORTED(id, name, bit) if (_features & CPU_##id) strcat(buf, name); + CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED) +#undef ADD_FEATURE_IF_SUPPORTED + + _features_string = os::strdup(buf); + +#ifdef COMPILER2 + c2_initialize(); +#endif // COMPILER2 + + UNSUPPORTED_OPTION(CriticalJNINatives); +} + +#ifdef COMPILER2 +void VM_Version::c2_initialize() { + if (UseCMoveUnconditionally) { + FLAG_SET_DEFAULT(UseCMoveUnconditionally, false); + } + + if (ConditionalMoveLimit > 0) { + FLAG_SET_DEFAULT(ConditionalMoveLimit, 0); + } + + if (!UseRVV) { + FLAG_SET_DEFAULT(SpecialEncodeISOArray, false); + } + + if (!UseRVV && MaxVectorSize) { + FLAG_SET_DEFAULT(MaxVectorSize, 0); + } + + if (!UseRVV) { + FLAG_SET_DEFAULT(UseRVVForBigIntegerShiftIntrinsics, false); + } + + if (UseRVV) { + if (FLAG_IS_DEFAULT(MaxVectorSize)) { + MaxVectorSize = _initial_vector_length; + } else if (MaxVectorSize < 16) { + warning("RVV does not support vector length less than 16 bytes. Disabling RVV."); + UseRVV = false; + } else if (is_power_of_2(MaxVectorSize)) { + if (MaxVectorSize > _initial_vector_length) { + warning("Current system only supports max RVV vector length %d. Set MaxVectorSize to %d", + _initial_vector_length, _initial_vector_length); + } + MaxVectorSize = _initial_vector_length; + } else { + vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize)); + } + } + + // disable prefetch + if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); + } + + if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { + FLAG_SET_DEFAULT(UseMulAddIntrinsic, true); + } + + if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { + FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true); + } + + if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { + FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true); + } + + if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { + FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true); + } + + if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { + FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true); + } +} +#endif // COMPILER2 Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vm_version_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vm_version_riscv.hpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_VM_VERSION_RISCV_HPP +#define CPU_RISCV_VM_VERSION_RISCV_HPP + +#include "runtime/abstract_vm_version.hpp" +#include "runtime/arguments.hpp" +#include "runtime/globals_extension.hpp" +#include "utilities/sizes.hpp" + +class VM_Version : public Abstract_VM_Version { +#ifdef COMPILER2 +private: + static void c2_initialize(); +#endif // COMPILER2 + +protected: + static const char* _uarch; + static uint32_t _initial_vector_length; + static void get_os_cpu_info(); + static uint32_t get_current_vector_length(); + +public: + // Initialization + static void initialize(); + + constexpr static bool supports_stack_watermark_barrier() { return true; } + + enum Feature_Flag { +#define CPU_FEATURE_FLAGS(decl) \ + decl(I, "i", 8) \ + decl(M, "m", 12) \ + decl(A, "a", 0) \ + decl(F, "f", 5) \ + decl(D, "d", 3) \ + decl(C, "c", 2) \ + decl(V, "v", 21) \ + decl(B, "b", 1) + +#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit), + CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG) +#undef DECLARE_CPU_FEATURE_FLAG + }; +}; + +#endif // CPU_RISCV_VM_VERSION_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vmreg_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vmreg_riscv.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "code/vmreg.hpp" + +void VMRegImpl::set_regName() { + int i = 0; + Register reg = ::as_Register(0); + for ( ; i < ConcreteRegisterImpl::max_gpr ; ) { + for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) { + regName[i++] = reg->name(); + } + reg = reg->successor(); + } + + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { + for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) { + regName[i++] = reg->name(); + } + freg = freg->successor(); + } + + VectorRegister vreg = ::as_VectorRegister(0); + for ( ; i < ConcreteRegisterImpl::max_vpr ; ) { + for (int j = 0 ; j < VectorRegisterImpl::max_slots_per_register ; j++) { + regName[i++] = reg->name(); + } + vreg = vreg->successor(); + } + + for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) { + regName[i] = "NON-GPR-FPR-VPR"; + } +} + +VMReg VMRegImpl::vmStorageToVMReg(int type, int index) { + Unimplemented(); + return VMRegImpl::Bad(); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vmreg_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vmreg_riscv.hpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_VMREG_RISCV_HPP +#define CPU_RISCV_VMREG_RISCV_HPP + +inline bool is_Register() { + return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; +} + +inline bool is_FloatRegister() { + return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; +} + +inline bool is_VectorRegister() { + return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr; +} + +inline Register as_Register() { + assert(is_Register(), "must be"); + return ::as_Register(value() / RegisterImpl::max_slots_per_register); +} + +inline FloatRegister as_FloatRegister() { + assert(is_FloatRegister() && is_even(value()), "must be"); + return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) / + FloatRegisterImpl::max_slots_per_register); +} + +inline VectorRegister as_VectorRegister() { + assert(is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be"); + return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) / + VectorRegisterImpl::max_slots_per_register); +} + +inline bool is_concrete() { + assert(is_reg(), "must be"); + if (is_VectorRegister()) { + int base = value() - ConcreteRegisterImpl::max_fpr; + return (base % VectorRegisterImpl::max_slots_per_register) == 0; + } else { + return is_even(value()); + } +} + +#endif // CPU_RISCV_VMREG_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP +#define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP + +inline VMReg RegisterImpl::as_VMReg() { + if (this == noreg) { + return VMRegImpl::Bad(); + } + return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register); +} + +inline VMReg FloatRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) + + ConcreteRegisterImpl::max_gpr); +} + +inline VMReg VectorRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) + + ConcreteRegisterImpl::max_fpr); +} + +#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "assembler_riscv.inline.hpp" +#include "code/vtableStubs.hpp" +#include "interp_masm_riscv.hpp" +#include "memory/resourceArea.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/instanceKlass.hpp" +#include "oops/klassVtable.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_riscv.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +// machine-dependent part of VtableStubs: create VtableStub of correct size and +// initialize its code + +#define __ masm-> + +#ifndef PRODUCT +extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); +#endif + +VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = code_size_limit(true); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { + return NULL; + } + + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc = NULL; + int slop_bytes = 0; + int slop_delta = 0; + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + assert_cond(masm != NULL); + +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + __ la(t2, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); + __ add_memory_int64(Address(t2), 1); + } +#endif + + // get receiver (need to skip return address on top of stack) + assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); + + // get receiver klass + address npe_addr = __ pc(); + __ load_klass(t2, j_rarg0); + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + start_pc = __ pc(); + + // check offset vs vtable length + __ lwu(t0, Address(t2, Klass::vtable_length_offset())); + __ mvw(t1, vtable_index * vtableEntry::size()); + __ bgt(t0, t1, L); + __ enter(); + __ mv(x12, vtable_index); + + __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, x12); + const ptrdiff_t estimate = 256; + const ptrdiff_t codesize = __ pc() - start_pc; + slop_delta = estimate - codesize; // call_VM varies in length, depending on data + slop_bytes += slop_delta; + assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize); + + __ leave(); + __ bind(L); + } +#endif // PRODUCT + + start_pc = __ pc(); + __ lookup_virtual_method(t2, vtable_index, xmethod); + // lookup_virtual_method generates + // 4 instructions (maximum value encountered in normal case):li(lui + addiw) + add + ld + // 1 instruction (best case):ld * 1 + slop_delta = 16 - (int)(__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + __ beqz(xmethod, L); + __ ld(t0, Address(xmethod, Method::from_compiled_offset())); + __ bnez(t0, L); + __ stop("Vtable entry is NULL"); + __ bind(L); + } +#endif // PRODUCT + + // x10: receiver klass + // xmethod: Method* + // x12: receiver + address ame_addr = __ pc(); + __ ld(t0, Address(xmethod, Method::from_compiled_offset())); + __ jr(t0); + + masm->flush(); + bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0); + + return s; +} + +VtableStub* VtableStubs::create_itable_stub(int itable_index) { + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = code_size_limit(false); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { + return NULL; + } + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc = NULL; + int slop_bytes = 0; + int slop_delta = 0; + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + assert_cond(masm != NULL); + +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + __ la(x18, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); + __ add_memory_int64(Address(x18), 1); + } +#endif + + // get receiver (need to skip return address on top of stack) + assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); + + // Entry arguments: + // t2: CompiledICHolder + // j_rarg0: Receiver + + // This stub is called from compiled code which has no callee-saved registers, + // so all registers except arguments are free at this point. + const Register recv_klass_reg = x18; + const Register holder_klass_reg = x19; // declaring interface klass (DECC) + const Register resolved_klass_reg = xmethod; // resolved interface klass (REFC) + const Register temp_reg = x28; + const Register temp_reg2 = x29; + const Register icholder_reg = t1; + + Label L_no_such_interface; + + __ ld(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset())); + __ ld(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset())); + + start_pc = __ pc(); + + // get receiver klass (also an implicit null-check) + address npe_addr = __ pc(); + __ load_klass(recv_klass_reg, j_rarg0); + + // Receiver subtype check against REFC. + __ lookup_interface_method(// inputs: rec. class, interface + recv_klass_reg, resolved_klass_reg, noreg, + // outputs: scan temp. reg1, scan temp. reg2 + temp_reg2, temp_reg, + L_no_such_interface, + /*return_method=*/false); + + const ptrdiff_t typecheckSize = __ pc() - start_pc; + start_pc = __ pc(); + + // Get selected method from declaring class and itable index + __ lookup_interface_method(// inputs: rec. class, interface, itable index + recv_klass_reg, holder_klass_reg, itable_index, + // outputs: method, scan temp. reg + xmethod, temp_reg, + L_no_such_interface); + + const ptrdiff_t lookupSize = __ pc() - start_pc; + + // Reduce "estimate" such that "padding" does not drop below 8. + const ptrdiff_t estimate = 256; + const ptrdiff_t codesize = typecheckSize + lookupSize; + slop_delta = (int)(estimate - codesize); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize); + +#ifdef ASSERT + if (DebugVtables) { + Label L2; + __ beqz(xmethod, L2); + __ ld(t0, Address(xmethod, Method::from_compiled_offset())); + __ bnez(t0, L2); + __ stop("compiler entrypoint is null"); + __ bind(L2); + } +#endif // ASSERT + + // xmethod: Method* + // j_rarg0: receiver + address ame_addr = __ pc(); + __ ld(t0, Address(xmethod, Method::from_compiled_offset())); + __ jr(t0); + + __ bind(L_no_such_interface); + // Handle IncompatibleClassChangeError in itable stubs. + // More detailed error message. + // We force resolving of the call site by jumping to the "handle + // wrong method" stub, and so let the interpreter runtime do all the + // dirty work. + assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order"); + __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + + masm->flush(); + bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); + + return s; +} + +int VtableStub::pd_code_alignment() { + // RISCV cache line size is not an architected constant. We just align on word size. + const unsigned int icache_line_size = wordSize; + return icache_line_size; +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2019 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -1445,7 +1445,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code c } // result = condition ? opr1 : opr2 -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { + assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on s390"); + Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual; switch (condition) { case lir_cond_equal: acond = Assembler::bcondEqual; ncond = Assembler::bcondNotEqual; break; Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2017,7 +2017,10 @@ void LIR_Assembler::emit_compare_and_swa } } -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { + assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on x86"); + Assembler::Condition acond, ncond; switch (condition) { case lir_cond_equal: acond = Assembler::equal; ncond = Assembler::notEqual; break; Index: jdk17u-jdk-17.0.5-8/src/hotspot/os/linux/os_linux.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/os/linux/os_linux.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/os/linux/os_linux.cpp @@ -2565,6 +2565,8 @@ void os::get_summary_cpu_info(char* cpui strncpy(cpuinfo, "IA64", length); #elif defined(PPC) strncpy(cpuinfo, "PPC64", length); +#elif defined(RISCV) + strncpy(cpuinfo, "RISCV64", length); #elif defined(S390) strncpy(cpuinfo, "S390", length); #elif defined(SPARC) Index: jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp @@ -0,0 +1,26 @@ +/* + * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// nothing required here Index: jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp @@ -0,0 +1,134 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP +#define OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP + +#include "runtime/vm_version.hpp" + +// Implementation of class atomic + +// Note that memory_order_conservative requires a full barrier after atomic stores. +// See https://patchwork.kernel.org/patch/3575821/ + +template +struct Atomic::PlatformAdd { + template + D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { + D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE); + FULL_MEM_BARRIER; + return res; + } + + template + D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const { + return add_and_fetch(dest, add_value, order) - add_value; + } +}; + +template +template +inline T Atomic::PlatformXchg::operator()(T volatile* dest, + T exchange_value, + atomic_memory_order order) const { + STATIC_ASSERT(byte_size == sizeof(T)); + T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE); + FULL_MEM_BARRIER; + return res; +} + +// __attribute__((unused)) on dest is to get rid of spurious GCC warnings. +template +template +inline T Atomic::PlatformCmpxchg::operator()(T volatile* dest __attribute__((unused)), + T compare_value, + T exchange_value, + atomic_memory_order order) const { + STATIC_ASSERT(byte_size == sizeof(T)); + T value = compare_value; + if (order != memory_order_relaxed) { + FULL_MEM_BARRIER; + } + + __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */ false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + + if (order != memory_order_relaxed) { + FULL_MEM_BARRIER; + } + return value; +} + +template<> +template +inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((unused)), + T compare_value, + T exchange_value, + atomic_memory_order order) const { + STATIC_ASSERT(4 == sizeof(T)); + if (order != memory_order_relaxed) { + FULL_MEM_BARRIER; + } + T rv; + int tmp; + __asm volatile( + "1:\n\t" + " addiw %[tmp], %[cv], 0\n\t" // make sure compare_value signed_extend + " lr.w.aq %[rv], (%[dest])\n\t" + " bne %[rv], %[tmp], 2f\n\t" + " sc.w.rl %[tmp], %[ev], (%[dest])\n\t" + " bnez %[tmp], 1b\n\t" + "2:\n\t" + : [rv] "=&r" (rv), [tmp] "=&r" (tmp) + : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value) + : "memory"); + if (order != memory_order_relaxed) { + FULL_MEM_BARRIER; + } + return rv; +} + +template +struct Atomic::PlatformOrderedLoad +{ + template + T operator()(const volatile T* p) const { T data; __atomic_load(const_cast(p), &data, __ATOMIC_ACQUIRE); return data; } +}; + +template +struct Atomic::PlatformOrderedStore +{ + template + void operator()(volatile T* p, T v) const { __atomic_store(const_cast(p), &v, __ATOMIC_RELEASE); } +}; + +template +struct Atomic::PlatformOrderedStore +{ + template + void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); } +}; + +#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP +#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP + +#include + +// Efficient swapping of data bytes from Java byte +// ordering to native byte ordering and vice versa. +inline u2 Bytes::swap_u2(u2 x) { + return bswap_16(x); +} + +inline u4 Bytes::swap_u4(u4 x) { + return bswap_32(x); +} + +inline u8 Bytes::swap_u8(u8 x) { + return bswap_64(x); +} + +#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP +#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP + +// Empty for build system + +#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP +#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP + +#include + +// +// Support for building on older Linux systems +// + +#ifndef SYS_memfd_create +#define SYS_memfd_create 279 +#endif +#ifndef SYS_fallocate +#define SYS_fallocate 47 +#endif + +#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP +#define OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, DontYieldALot, false); +define_pd_global(intx, ThreadStackSize, 2048); // 0 => use system default +define_pd_global(intx, VMThreadStackSize, 2048); + +define_pd_global(intx, CompilerThreadStackSize, 2048); + +define_pd_global(uintx, JVMInvokeMethodSlack, 8192); + +// Used on 64 bit platforms for UseCompressedOops base address +define_pd_global(uintx, HeapBaseMinAddress, 2 * G); + +#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP +#define OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP + +// Included in orderAccess.hpp header file. + +#include "runtime/vm_version.hpp" + +// Implementation of class OrderAccess. + +inline void OrderAccess::loadload() { acquire(); } +inline void OrderAccess::storestore() { release(); } +inline void OrderAccess::loadstore() { acquire(); } +inline void OrderAccess::storeload() { fence(); } + +#define FULL_MEM_BARRIER __sync_synchronize() +#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); +#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); + +inline void OrderAccess::acquire() { + READ_MEM_BARRIER; +} + +inline void OrderAccess::release() { + WRITE_MEM_BARRIER; +} + +inline void OrderAccess::fence() { + FULL_MEM_BARRIER; +} + +inline void OrderAccess::cross_modify_fence_impl() { + asm volatile("fence.i" : : : "memory"); + if (UseConservativeFence) { + asm volatile("fence ir, ir" : : : "memory"); + } +} + +#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp @@ -0,0 +1,466 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// no precompiled headers +#include "asm/macroAssembler.hpp" +#include "classfile/vmSymbols.hpp" +#include "code/codeCache.hpp" +#include "code/icBuffer.hpp" +#include "code/nativeInst.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" +#include "jvm.h" +#include "memory/allocation.inline.hpp" +#include "os_share_linux.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "runtime/arguments.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/java.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/osThread.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/timer.hpp" +#include "signals_posix.hpp" +#include "utilities/debug.hpp" +#include "utilities/events.hpp" +#include "utilities/vmError.hpp" + +// put OS-includes here +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +#define REG_LR 1 +#define REG_FP 8 + +NOINLINE address os::current_stack_pointer() { + return (address)__builtin_frame_address(0); +} + +char* os::non_memory_address_word() { + // Must never look like an address returned by reserve_memory, + return (char*) -1; +} + +address os::Posix::ucontext_get_pc(const ucontext_t * uc) { + return (address)uc->uc_mcontext.__gregs[REG_PC]; +} + +void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) { + uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc; +} + +intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP]; +} + +intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; +} + +address os::fetch_frame_from_context(const void* ucVoid, + intptr_t** ret_sp, intptr_t** ret_fp) { + address epc; + const ucontext_t* uc = (const ucontext_t*)ucVoid; + + if (uc != NULL) { + epc = os::Posix::ucontext_get_pc(uc); + if (ret_sp != NULL) { + *ret_sp = os::Linux::ucontext_get_sp(uc); + } + if (ret_fp != NULL) { + *ret_fp = os::Linux::ucontext_get_fp(uc); + } + } else { + epc = NULL; + if (ret_sp != NULL) { + *ret_sp = (intptr_t *)NULL; + } + if (ret_fp != NULL) { + *ret_fp = (intptr_t *)NULL; + } + } + + return epc; +} + +frame os::fetch_compiled_frame_from_context(const void* ucVoid) { + const ucontext_t* uc = (const ucontext_t*)ucVoid; + // In compiled code, the stack banging is performed before RA + // has been saved in the frame. RA is live, and SP and FP + // belong to the caller. + intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc); + intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc); + address frame_pc = (address)(uc->uc_mcontext.__gregs[REG_LR] + - NativeInstruction::instruction_size); + return frame(frame_sp, frame_fp, frame_pc); +} + +frame os::fetch_frame_from_context(const void* ucVoid) { + intptr_t* frame_sp = NULL; + intptr_t* frame_fp = NULL; + address epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp); + return frame(frame_sp, frame_fp, epc); +} + +// By default, gcc always saves frame pointer rfp on this stack. This +// may get turned off by -fomit-frame-pointer. +frame os::get_sender_for_C_frame(frame* fr) { + return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); +} + +NOINLINE frame os::current_frame() { + intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0); + if (sender_sp != NULL) { + frame myframe((intptr_t*)os::current_stack_pointer(), + sender_sp[frame::link_offset], + CAST_FROM_FN_PTR(address, os::current_frame)); + if (os::is_first_C_frame(&myframe)) { + // stack is not walkable + return frame(); + } else { + return os::get_sender_for_C_frame(&myframe); + } + } else { + ShouldNotReachHere(); + return frame(); + } +} + +// Utility functions +bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, + ucontext_t* uc, JavaThread* thread) { + + // decide if this trap can be handled by a stub + address stub = NULL; + + address pc = NULL; + + //%note os_trap_1 + if (info != NULL && uc != NULL && thread != NULL) { + pc = (address) os::Posix::ucontext_get_pc(uc); + + address addr = (address) info->si_addr; + + // Make sure the high order byte is sign extended, as it may be masked away by the hardware. + if ((uintptr_t(addr) & (uintptr_t(1) << 55)) != 0) { + addr = address(uintptr_t(addr) | (uintptr_t(0xFF) << 56)); + } + + // Handle ALL stack overflow variations here + if (sig == SIGSEGV) { + // check if fault address is within thread stack + if (thread->is_in_full_stack(addr)) { + if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) { + return true; // continue + } + } + } + + if (thread->thread_state() == _thread_in_Java) { + // Java thread running in Java code => find exception handler if any + // a fault inside compiled code, the interpreter, or a stub + + // Handle signal from NativeJump::patch_verified_entry(). + if ((sig == SIGILL || sig == SIGTRAP) + && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { + if (TraceTraps) { + tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL"); + } + stub = SharedRuntime::get_handle_wrong_method_stub(); + } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) { + stub = SharedRuntime::get_poll_stub(pc); + } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { + // BugId 4454115: A read from a MappedByteBuffer can fault + // here if the underlying file has been truncated. + // Do not crash the VM in such a case. + CodeBlob* cb = CodeCache::find_blob_unsafe(pc); + CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; + bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { + address next_pc = pc + NativeCall::instruction_size; + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } + } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) { + // Pull a pointer to the error message out of the instruction + // stream. + const uint64_t *detail_msg_ptr + = (uint64_t*)(pc + NativeInstruction::instruction_size); + const char *detail_msg = (const char *)*detail_msg_ptr; + const char *msg = "stop"; + if (TraceTraps) { + tty->print_cr("trap: %s: (SIGILL)", msg); + } + + // End life with a fatal error, message and detail message and the context. + // Note: no need to do any post-processing here (e.g. signal chaining) + va_list va_dummy; + VMError::report_and_die(thread, uc, NULL, 0, msg, detail_msg, va_dummy); + va_end(va_dummy); + + ShouldNotReachHere(); + } else if (sig == SIGFPE && + (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { + stub = + SharedRuntime:: + continuation_for_implicit_exception(thread, + pc, + SharedRuntime:: + IMPLICIT_DIVIDE_BY_ZERO); + } else if (sig == SIGSEGV && + MacroAssembler::uses_implicit_null_check((void*)addr)) { + // Determination of interpreter/vtable stub/compiled code null exception + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); + } + } else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && + sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ + thread->doing_unsafe_access()) { + address next_pc = pc + NativeCall::instruction_size; + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } + + // jni_fast_GetField can trap at certain pc's if a GC kicks in + // and the heap gets shrunk before the field access. + if ((sig == SIGSEGV) || (sig == SIGBUS)) { + address addr_slow = JNI_FastGetField::find_slowcase_pc(pc); + if (addr_slow != (address)-1) { + stub = addr_slow; + } + } + } + + if (stub != NULL) { + // save all thread context in case we need to restore it + if (thread != NULL) { + thread->set_saved_exception_pc(pc); + } + + os::Posix::ucontext_set_pc(uc, stub); + return true; + } + + return false; // Mute compiler +} + +void os::Linux::init_thread_fpu_state(void) { +} + +int os::Linux::get_fpu_control_word(void) { + return 0; +} + +void os::Linux::set_fpu_control_word(int fpu_control) { +} + +//////////////////////////////////////////////////////////////////////////////// +// thread stack + +// Minimum usable stack sizes required to get to user code. Space for +// HotSpot guard pages is added later. +size_t os::Posix::_compiler_thread_min_stack_allowed = 72 * K; +size_t os::Posix::_java_thread_min_stack_allowed = 72 * K; +size_t os::Posix::_vm_internal_thread_min_stack_allowed = 72 * K; + +// return default stack size for thr_type +size_t os::Posix::default_stack_size(os::ThreadType thr_type) { + // default stack size (compiler thread needs larger stack) + size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M); + return s; +} + +///////////////////////////////////////////////////////////////////////////// +// helper functions for fatal error handler + +static const char* reg_abi_names[] = { + "pc", + "x1(ra)", "x2(sp)", "x3(gp)", "x4(tp)", + "x5(t0)", "x6(t1)", "x7(t2)", + "x8(s0)", "x9(s1)", + "x10(a0)", "x11(a1)", "x12(a2)", "x13(a3)", "x14(a4)", "x15(a5)", "x16(a6)", "x17(a7)", + "x18(s2)", "x19(s3)", "x20(s4)", "x21(s5)", "x22(s6)", "x23(s7)", "x24(s8)", "x25(s9)", "x26(s10)", "x27(s11)", + "x28(t3)", "x29(t4)","x30(t5)", "x31(t6)" +}; + +void os::print_context(outputStream *st, const void *context) { + if (context == NULL) { + return; + } + + const ucontext_t *uc = (const ucontext_t*)context; + st->print_cr("Registers:"); + for (int r = 0; r < 32; r++) { + st->print("%-*.*s=", 8, 8, reg_abi_names[r]); + print_location(st, uc->uc_mcontext.__gregs[r]); + } + st->cr(); + + intptr_t *frame_sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); + st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(frame_sp)); + print_hex_dump(st, (address)frame_sp, (address)(frame_sp + 64), sizeof(intptr_t)); + st->cr(); + + // Note: it may be unsafe to inspect memory near pc. For example, pc may + // point to garbage if entry point in an nmethod is corrupted. Leave + // this at the end, and hope for the best. + address pc = os::Posix::ucontext_get_pc(uc); + print_instructions(st, pc, sizeof(char)); + st->cr(); +} + +void os::print_register_info(outputStream *st, const void *context) { + if (context == NULL) { + return; + } + + const ucontext_t *uc = (const ucontext_t*)context; + + st->print_cr("Register to memory mapping:"); + st->cr(); + + // this is horrendously verbose but the layout of the registers in the + // context does not match how we defined our abstract Register set, so + // we can't just iterate through the gregs area + + // this is only for the "general purpose" registers + + for (int r = 0; r < 32; r++) + st->print_cr("%-*.*s=" INTPTR_FORMAT, 8, 8, reg_abi_names[r], (uintptr_t)uc->uc_mcontext.__gregs[r]); + st->cr(); +} + +void os::setup_fpu() { +} + +#ifndef PRODUCT +void os::verify_stack_alignment() { + assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); +} +#endif + +int os::extra_bang_size_in_bytes() { + return 0; +} + +extern "C" { + int SpinPause() { + return 0; + } + + void _Copy_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { + if (from > to) { + const jshort *end = from + count; + while (from < end) { + *(to++) = *(from++); + } + } else if (from < to) { + const jshort *end = from; + from += count - 1; + to += count - 1; + while (from >= end) { + *(to--) = *(from--); + } + } + } + void _Copy_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { + if (from > to) { + const jint *end = from + count; + while (from < end) { + *(to++) = *(from++); + } + } else if (from < to) { + const jint *end = from; + from += count - 1; + to += count - 1; + while (from >= end) { + *(to--) = *(from--); + } + } + } + void _Copy_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { + if (from > to) { + const jlong *end = from + count; + while (from < end) { + os::atomic_copy64(from++, to++); + } + } else if (from < to) { + const jlong *end = from; + from += count - 1; + to += count - 1; + while (from >= end) { + os::atomic_copy64(from--, to--); + } + } + } + + void _Copy_arrayof_conjoint_bytes(const HeapWord* from, + HeapWord* to, + size_t count) { + memmove(to, from, count); + } + void _Copy_arrayof_conjoint_jshorts(const HeapWord* from, + HeapWord* to, + size_t count) { + memmove(to, from, count * 2); + } + void _Copy_arrayof_conjoint_jints(const HeapWord* from, + HeapWord* to, + size_t count) { + memmove(to, from, count * 4); + } + void _Copy_arrayof_conjoint_jlongs(const HeapWord* from, + HeapWord* to, + size_t count) { + memmove(to, from, count * 8); + } +}; Index: jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP +#define OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP + + static void setup_fpu(); + + // Used to register dynamic code cache area with the OS + // Note: Currently only used in 64 bit Windows implementations + static bool register_code_area(char *low, char *high) { return true; } + + // Atomically copy 64 bits of data + static void atomic_copy64(const volatile void *src, volatile void *dst) { + *(jlong *) dst = *(const jlong *) src; + } + + // SYSCALL_RISCV_FLUSH_ICACHE is used to flush instruction cache. The "fence.i" instruction + // only work on the current hart, so kernel provides the icache flush syscall to flush icache + // on each hart. You can pass a flag to determine a global or local icache flush. + static void icache_flush(long int start, long int end) + { + const int SYSCALL_RISCV_FLUSH_ICACHE = 259; + register long int __a7 asm ("a7") = SYSCALL_RISCV_FLUSH_ICACHE; + register long int __a0 asm ("a0") = start; + register long int __a1 asm ("a1") = end; + // the flush can be applied to either all threads or only the current. + // 0 means a global icache flush, and the icache flush will be applied + // to other harts concurrently executing. + register long int __a2 asm ("a2") = 0; + __asm__ volatile ("ecall\n\t" + : "+r" (__a0) + : "r" (__a0), "r" (__a1), "r" (__a2), "r" (__a7) + : "memory"); + } + +#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP +#define OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP + +#include "runtime/prefetch.hpp" + + +inline void Prefetch::read (void *loc, intx interval) { +} + +inline void Prefetch::write(void *loc, intx interval) { +} + +#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/thread.inline.hpp" + +frame JavaThread::pd_last_frame() { + assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); + return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); +} + +// For Forte Analyzer AsyncGetCallTrace profiling support - thread is +// currently interrupted by SIGPROF +bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, + void* ucontext, bool isInJava) { + + assert(Thread::current() == this, "caller must be current thread"); + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + +bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + +bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { + // If we have a last_Java_frame, then we should use it even if + // isInJava == true. It should be more reliable than ucontext info. + if (has_last_Java_frame() && frame_anchor()->walkable()) { + *fr_addr = pd_last_frame(); + return true; + } + + // At this point, we don't have a last_Java_frame, so + // we try to glean some information out of the ucontext + // if we were running Java code when SIGPROF came in. + if (isInJava) { + ucontext_t* uc = (ucontext_t*) ucontext; + + intptr_t* ret_fp = NULL; + intptr_t* ret_sp = NULL; + address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp); + if (addr == NULL || ret_sp == NULL ) { + // ucontext wasn't useful + return false; + } + + frame ret_frame(ret_sp, ret_fp, addr); + if (!ret_frame.safe_for_sender(this)) { +#ifdef COMPILER2 + frame ret_frame2(ret_sp, NULL, addr); + if (!ret_frame2.safe_for_sender(this)) { + // nothing else to try if the frame isn't good + return false; + } + ret_frame = ret_frame2; +#else + // nothing else to try if the frame isn't good + return false; +#endif /* COMPILER2 */ + } + *fr_addr = ret_frame; + return true; + } + + // nothing else to try + return false; +} + +void JavaThread::cache_global_variables() { } Index: jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP +#define OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP + + private: + void pd_initialize() { + _anchor.clear(); + } + + frame pd_last_frame(); + + public: + static ByteSize last_Java_fp_offset() { + return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); + } + + bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, + bool isInJava); + + bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); +private: + bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); + +#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP +#define OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP + +// These are the OS and CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* Threads (NOTE: incomplete) */ \ + /******************************/ \ + nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ + nonstatic_field(OSThread, _pthread_id, pthread_t) + + +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + \ + /**********************/ \ + /* Posix Thread IDs */ \ + /**********************/ \ + \ + declare_integer_type(OSThread::thread_id_t) \ + declare_unsigned_integer_type(pthread_t) + +#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/register.hpp" +#include "runtime/os.hpp" +#include "runtime/os.inline.hpp" +#include "runtime/vm_version.hpp" + +#include +#include + +#ifndef HWCAP_ISA_I +#define HWCAP_ISA_I (1 << ('I' - 'A')) +#endif + +#ifndef HWCAP_ISA_M +#define HWCAP_ISA_M (1 << ('M' - 'A')) +#endif + +#ifndef HWCAP_ISA_A +#define HWCAP_ISA_A (1 << ('A' - 'A')) +#endif + +#ifndef HWCAP_ISA_F +#define HWCAP_ISA_F (1 << ('F' - 'A')) +#endif + +#ifndef HWCAP_ISA_D +#define HWCAP_ISA_D (1 << ('D' - 'A')) +#endif + +#ifndef HWCAP_ISA_C +#define HWCAP_ISA_C (1 << ('C' - 'A')) +#endif + +#ifndef HWCAP_ISA_V +#define HWCAP_ISA_V (1 << ('V' - 'A')) +#endif + +#ifndef HWCAP_ISA_B +#define HWCAP_ISA_B (1 << ('B' - 'A')) +#endif + +#define read_csr(csr) \ +({ \ + register unsigned long __v; \ + __asm__ __volatile__ ("csrr %0, %1" \ + : "=r" (__v) \ + : "i" (csr) \ + : "memory"); \ + __v; \ +}) + +uint32_t VM_Version::get_current_vector_length() { + assert(_features & CPU_V, "should not call this"); + return (uint32_t)read_csr(CSR_VLENB); +} + +void VM_Version::get_os_cpu_info() { + + uint64_t auxv = getauxval(AT_HWCAP); + + static_assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP"); + static_assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP"); + static_assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP"); + static_assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP"); + static_assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP"); + static_assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP"); + static_assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP"); + static_assert(CPU_B == HWCAP_ISA_B, "Flag CPU_B must follow Linux HWCAP"); + _features = auxv & ( + HWCAP_ISA_I | + HWCAP_ISA_M | + HWCAP_ISA_A | + HWCAP_ISA_F | + HWCAP_ISA_D | + HWCAP_ISA_C | + HWCAP_ISA_V | + HWCAP_ISA_B); + + if (FILE *f = fopen("/proc/cpuinfo", "r")) { + char buf[512], *p; + while (fgets(buf, sizeof (buf), f) != NULL) { + if ((p = strchr(buf, ':')) != NULL) { + if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) { + char* uarch = os::strdup(p + 2); + uarch[strcspn(uarch, "\n")] = '\0'; + _uarch = uarch; + break; + } + } + } + fclose(f); + } +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/share/c1/c1_LIR.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/share/c1/c1_LIR.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/share/c1/c1_LIR.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -187,7 +187,6 @@ bool LIR_OprDesc::is_oop() const { void LIR_Op2::verify() const { #ifdef ASSERT switch (code()) { - case lir_cmove: case lir_xchg: break; @@ -238,8 +237,7 @@ void LIR_Op2::verify() const { LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BlockBegin* block) - : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) - , _cond(cond) + : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) , _label(block->label()) , _block(block) , _ublock(NULL) @@ -247,8 +245,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition } LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, CodeStub* stub) : - LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) - , _cond(cond) + LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) , _label(stub->entry()) , _block(NULL) , _ublock(NULL) @@ -256,8 +253,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition } LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BlockBegin* block, BlockBegin* ublock) - : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) - , _cond(cond) + : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) , _label(block->label()) , _block(block) , _ublock(ublock) @@ -279,13 +275,13 @@ void LIR_OpBranch::change_ublock(BlockBe } void LIR_OpBranch::negate_cond() { - switch (_cond) { - case lir_cond_equal: _cond = lir_cond_notEqual; break; - case lir_cond_notEqual: _cond = lir_cond_equal; break; - case lir_cond_less: _cond = lir_cond_greaterEqual; break; - case lir_cond_lessEqual: _cond = lir_cond_greater; break; - case lir_cond_greaterEqual: _cond = lir_cond_less; break; - case lir_cond_greater: _cond = lir_cond_lessEqual; break; + switch (cond()) { + case lir_cond_equal: set_cond(lir_cond_notEqual); break; + case lir_cond_notEqual: set_cond(lir_cond_equal); break; + case lir_cond_less: set_cond(lir_cond_greaterEqual); break; + case lir_cond_lessEqual: set_cond(lir_cond_greater); break; + case lir_cond_greaterEqual: set_cond(lir_cond_less); break; + case lir_cond_greater: set_cond(lir_cond_lessEqual); break; default: ShouldNotReachHere(); } } @@ -513,6 +509,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) assert(op->as_OpBranch() != NULL, "must be"); LIR_OpBranch* opBranch = (LIR_OpBranch*)op; + assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() && + opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() && + opBranch->_tmp5->is_illegal(), "not used"); + + if (opBranch->_opr1->is_valid()) do_input(opBranch->_opr1); + if (opBranch->_opr2->is_valid()) do_input(opBranch->_opr2); + if (opBranch->_info != NULL) do_info(opBranch->_info); assert(opBranch->_result->is_illegal(), "not used"); if (opBranch->_stub != NULL) opBranch->stub()->visit(this); @@ -601,17 +604,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) // to the result operand, otherwise the backend fails case lir_cmove: { - assert(op->as_Op2() != NULL, "must be"); - LIR_Op2* op2 = (LIR_Op2*)op; + assert(op->as_Op4() != NULL, "must be"); + LIR_Op4* op4 = (LIR_Op4*)op; - assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() && - op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used"); - assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used"); - - do_input(op2->_opr1); - do_input(op2->_opr2); - do_temp(op2->_opr2); - do_output(op2->_result); + assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() && + op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "not used"); + assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used"); + + do_input(op4->_opr1); + do_input(op4->_opr2); + if (op4->_opr3->is_valid()) do_input(op4->_opr3); + if (op4->_opr4->is_valid()) do_input(op4->_opr4); + do_temp(op4->_opr2); + do_output(op4->_result); break; } @@ -1042,6 +1047,10 @@ void LIR_Op3::emit_code(LIR_Assembler* m masm->emit_op3(this); } +void LIR_Op4::emit_code(LIR_Assembler* masm) { + masm->emit_op4(this); +} + void LIR_OpLock::emit_code(LIR_Assembler* masm) { masm->emit_lock(this); if (stub()) { @@ -1078,6 +1087,10 @@ LIR_List::LIR_List(Compilation* compilat , _file(NULL) , _line(0) #endif +#ifdef RISCV + , _cmp_opr1(LIR_OprFact::illegalOpr) + , _cmp_opr2(LIR_OprFact::illegalOpr) +#endif { } @@ -1095,6 +1108,38 @@ void LIR_List::set_file_and_line(const c } #endif +#ifdef RISCV +void LIR_List::set_cmp_oprs(LIR_Op* op) { + switch (op->code()) { + case lir_cmp: + _cmp_opr1 = op->as_Op2()->in_opr1(); + _cmp_opr2 = op->as_Op2()->in_opr2(); + break; + case lir_branch: // fall through + case lir_cond_float_branch: + assert(op->as_OpBranch()->cond() == lir_cond_always || + (_cmp_opr1 != LIR_OprFact::illegalOpr && _cmp_opr2 != LIR_OprFact::illegalOpr), + "conditional branches must have legal operands"); + if (op->as_OpBranch()->cond() != lir_cond_always) { + op->as_Op2()->set_in_opr1(_cmp_opr1); + op->as_Op2()->set_in_opr2(_cmp_opr2); + } + break; + case lir_cmove: + op->as_Op4()->set_in_opr3(_cmp_opr1); + op->as_Op4()->set_in_opr4(_cmp_opr2); + break; +#if INCLUDE_ZGC + case lir_zloadbarrier_test: + _cmp_opr1 = FrameMap::as_opr(t1); + _cmp_opr2 = LIR_OprFact::intConst(0); + break; +#endif + default: + break; + } +} +#endif void LIR_List::append(LIR_InsertionBuffer* buffer) { assert(this == buffer->lir_list(), "wrong lir list"); @@ -1667,7 +1712,6 @@ const char * LIR_Op::name() const { case lir_cmp_l2i: s = "cmp_l2i"; break; case lir_ucmp_fd2i: s = "ucomp_fd2i"; break; case lir_cmp_fd2i: s = "comp_fd2i"; break; - case lir_cmove: s = "cmove"; break; case lir_add: s = "add"; break; case lir_sub: s = "sub"; break; case lir_mul: s = "mul"; break; @@ -1690,6 +1734,8 @@ const char * LIR_Op::name() const { case lir_irem: s = "irem"; break; case lir_fmad: s = "fmad"; break; case lir_fmaf: s = "fmaf"; break; + // LIR_Op4 + case lir_cmove: s = "cmove"; break; // LIR_OpJavaCall case lir_static_call: s = "static"; break; case lir_optvirtual_call: s = "optvirtual"; break; @@ -1825,6 +1871,8 @@ void LIR_Op1::print_patch_code(outputStr // LIR_OpBranch void LIR_OpBranch::print_instr(outputStream* out) const { print_condition(out, cond()); out->print(" "); + in_opr1()->print(out); out->print(" "); + in_opr2()->print(out); out->print(" "); if (block() != NULL) { out->print("[B%d] ", block()->block_id()); } else if (stub() != NULL) { @@ -1911,7 +1959,7 @@ void LIR_OpRoundFP::print_instr(outputSt // LIR_Op2 void LIR_Op2::print_instr(outputStream* out) const { - if (code() == lir_cmove || code() == lir_cmp) { + if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) { print_condition(out, condition()); out->print(" "); } in_opr1()->print(out); out->print(" "); @@ -1962,6 +2010,15 @@ void LIR_Op3::print_instr(outputStream* result_opr()->print(out); } +// LIR_Op4 +void LIR_Op4::print_instr(outputStream* out) const { + print_condition(out, condition()); out->print(" "); + in_opr1()->print(out); out->print(" "); + in_opr2()->print(out); out->print(" "); + in_opr3()->print(out); out->print(" "); + in_opr4()->print(out); out->print(" "); + result_opr()->print(out); +} void LIR_OpLock::print_instr(outputStream* out) const { hdr_opr()->print(out); out->print(" "); Index: jdk17u-jdk-17.0.5-8/src/hotspot/share/c1/c1_LIR.hpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/share/c1/c1_LIR.hpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/share/c1/c1_LIR.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -869,6 +869,7 @@ class LIR_Op2; class LIR_OpDelay; class LIR_Op3; class LIR_OpAllocArray; +class LIR_Op4; class LIR_OpCall; class LIR_OpJavaCall; class LIR_OpRTCall; @@ -913,8 +914,6 @@ enum LIR_Code { , lir_null_check , lir_return , lir_leal - , lir_branch - , lir_cond_float_branch , lir_move , lir_convert , lir_alloc_object @@ -924,11 +923,12 @@ enum LIR_Code { , lir_unwind , end_op1 , begin_op2 + , lir_branch + , lir_cond_float_branch , lir_cmp , lir_cmp_l2i , lir_ucmp_fd2i , lir_cmp_fd2i - , lir_cmove , lir_add , lir_sub , lir_mul @@ -956,6 +956,9 @@ enum LIR_Code { , lir_fmad , lir_fmaf , end_op3 + , begin_op4 + , lir_cmove + , end_op4 , begin_opJavaCall , lir_static_call , lir_optvirtual_call @@ -992,6 +995,11 @@ enum LIR_Code { , begin_opAssert , lir_assert , end_opAssert +#ifdef INCLUDE_ZGC + , begin_opZLoadBarrierTest + , lir_zloadbarrier_test + , end_opZLoadBarrierTest +#endif }; @@ -1128,6 +1136,7 @@ class LIR_Op: public CompilationResource virtual LIR_Op1* as_Op1() { return NULL; } virtual LIR_Op2* as_Op2() { return NULL; } virtual LIR_Op3* as_Op3() { return NULL; } + virtual LIR_Op4* as_Op4() { return NULL; } virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; } virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; } virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; } @@ -1400,45 +1409,6 @@ class LIR_OpRTCall: public LIR_OpCall { }; -class LIR_OpBranch: public LIR_Op { - friend class LIR_OpVisitState; - - private: - LIR_Condition _cond; - Label* _label; - BlockBegin* _block; // if this is a branch to a block, this is the block - BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block - CodeStub* _stub; // if this is a branch to a stub, this is the stub - - public: - LIR_OpBranch(LIR_Condition cond, Label* lbl) - : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL) - , _cond(cond) - , _label(lbl) - , _block(NULL) - , _ublock(NULL) - , _stub(NULL) { } - - LIR_OpBranch(LIR_Condition cond, BlockBegin* block); - LIR_OpBranch(LIR_Condition cond, CodeStub* stub); - - // for unordered comparisons - LIR_OpBranch(LIR_Condition cond, BlockBegin* block, BlockBegin* ublock); - - LIR_Condition cond() const { return _cond; } - Label* label() const { return _label; } - BlockBegin* block() const { return _block; } - BlockBegin* ublock() const { return _ublock; } - CodeStub* stub() const { return _stub; } - - void change_block(BlockBegin* b); - void change_ublock(BlockBegin* b); - void negate_cond(); - - virtual void emit_code(LIR_Assembler* masm); - virtual LIR_OpBranch* as_OpBranch() { return this; } - virtual void print_instr(outputStream* out) const PRODUCT_RETURN; -}; class LIR_OpReturn: public LIR_Op1 { friend class LIR_OpVisitState; @@ -1612,19 +1582,19 @@ class LIR_Op2: public LIR_Op { void verify() const; public: - LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL) + LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL, BasicType type = T_ILLEGAL) : LIR_Op(code, LIR_OprFact::illegalOpr, info) , _fpu_stack_size(0) , _opr1(opr1) , _opr2(opr2) - , _type(T_ILLEGAL) + , _type(type) , _tmp1(LIR_OprFact::illegalOpr) , _tmp2(LIR_OprFact::illegalOpr) , _tmp3(LIR_OprFact::illegalOpr) , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) , _condition(condition) { - assert(code == lir_cmp || code == lir_assert, "code check"); + assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check"); } LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) @@ -1656,7 +1626,7 @@ class LIR_Op2: public LIR_Op { , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) , _condition(lir_cond_unknown) { - assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); + assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); } LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, @@ -1672,7 +1642,7 @@ class LIR_Op2: public LIR_Op { , _tmp4(tmp4) , _tmp5(tmp5) , _condition(lir_cond_unknown) { - assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); + assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); } LIR_Opr in_opr1() const { return _opr1; } @@ -1684,10 +1654,10 @@ class LIR_Op2: public LIR_Op { LIR_Opr tmp4_opr() const { return _tmp4; } LIR_Opr tmp5_opr() const { return _tmp5; } LIR_Condition condition() const { - assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition; + assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition; } void set_condition(LIR_Condition condition) { - assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition; + assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition; } void set_fpu_stack_size(int size) { _fpu_stack_size = size; } @@ -1701,6 +1671,51 @@ class LIR_Op2: public LIR_Op { virtual void print_instr(outputStream* out) const PRODUCT_RETURN; }; +class LIR_OpBranch: public LIR_Op2 { + friend class LIR_OpVisitState; + + private: + Label* _label; + BlockBegin* _block; // if this is a branch to a block, this is the block + BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block + CodeStub* _stub; // if this is a branch to a stub, this is the stub + + public: + LIR_OpBranch(LIR_Condition cond, Label* lbl) + : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL) + , _label(lbl) + , _block(NULL) + , _ublock(NULL) + , _stub(NULL) { } + + LIR_OpBranch(LIR_Condition cond, BlockBegin* block); + LIR_OpBranch(LIR_Condition cond, CodeStub* stub); + + // for unordered comparisons + LIR_OpBranch(LIR_Condition cond, BlockBegin* block, BlockBegin* ublock); + + LIR_Condition cond() const { + return condition(); + } + + void set_cond(LIR_Condition cond) { + set_condition(cond); + } + + Label* label() const { return _label; } + BlockBegin* block() const { return _block; } + BlockBegin* ublock() const { return _ublock; } + CodeStub* stub() const { return _stub; } + + void change_block(BlockBegin* b); + void change_ublock(BlockBegin* b); + void negate_cond(); + + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_OpBranch* as_OpBranch() { return this; } + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +}; + class LIR_OpAllocArray : public LIR_Op { friend class LIR_OpVisitState; @@ -1764,6 +1779,63 @@ class LIR_Op3: public LIR_Op { virtual void print_instr(outputStream* out) const PRODUCT_RETURN; }; +class LIR_Op4: public LIR_Op { + friend class LIR_OpVisitState; + protected: + LIR_Opr _opr1; + LIR_Opr _opr2; + LIR_Opr _opr3; + LIR_Opr _opr4; + BasicType _type; + LIR_Opr _tmp1; + LIR_Opr _tmp2; + LIR_Opr _tmp3; + LIR_Opr _tmp4; + LIR_Opr _tmp5; + LIR_Condition _condition; + + public: + LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4, + LIR_Opr result, BasicType type) + : LIR_Op(code, result, NULL) + , _opr1(opr1) + , _opr2(opr2) + , _opr3(opr3) + , _opr4(opr4) + , _type(type) + , _tmp1(LIR_OprFact::illegalOpr) + , _tmp2(LIR_OprFact::illegalOpr) + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) + , _tmp5(LIR_OprFact::illegalOpr) + , _condition(condition) { + assert(code == lir_cmove, "code check"); + assert(type != T_ILLEGAL, "cmove should have type"); + } + + LIR_Opr in_opr1() const { return _opr1; } + LIR_Opr in_opr2() const { return _opr2; } + LIR_Opr in_opr3() const { return _opr3; } + LIR_Opr in_opr4() const { return _opr4; } + BasicType type() const { return _type; } + LIR_Opr tmp1_opr() const { return _tmp1; } + LIR_Opr tmp2_opr() const { return _tmp2; } + LIR_Opr tmp3_opr() const { return _tmp3; } + LIR_Opr tmp4_opr() const { return _tmp4; } + LIR_Opr tmp5_opr() const { return _tmp5; } + + LIR_Condition condition() const { return _condition; } + void set_condition(LIR_Condition condition) { _condition = condition; } + + void set_in_opr1(LIR_Opr opr) { _opr1 = opr; } + void set_in_opr2(LIR_Opr opr) { _opr2 = opr; } + void set_in_opr3(LIR_Opr opr) { _opr3 = opr; } + void set_in_opr4(LIR_Opr opr) { _opr4 = opr; } + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_Op4* as_Op4() { return this; } + + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +}; //-------------------------------- class LabelObj: public CompilationResourceObj { @@ -1986,6 +2058,10 @@ class LIR_List: public CompilationResour const char * _file; int _line; #endif +#ifdef RISCV + LIR_Opr _cmp_opr1; + LIR_Opr _cmp_opr2; +#endif public: void append(LIR_Op* op) { @@ -1998,6 +2074,12 @@ class LIR_List: public CompilationResour } #endif // PRODUCT +#ifdef RISCV + set_cmp_oprs(op); + // lir_cmp set cmp oprs only on riscv + if (op->code() == lir_cmp) return; +#endif + _operations.append(op); #ifdef ASSERT @@ -2014,6 +2096,10 @@ class LIR_List: public CompilationResour void set_file_and_line(const char * file, int line); #endif +#ifdef RISCV + void set_cmp_oprs(LIR_Op* op); +#endif + //---------- accessors --------------- LIR_OpList* instructions_list() { return &_operations; } int length() const { return _operations.length(); } @@ -2133,8 +2219,9 @@ class LIR_List: public CompilationResour void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info); - void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { - append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type)); + void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type, + LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) { + append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type)); } void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, Index: jdk17u-jdk-17.0.5-8/src/hotspot/share/c1/c1_LIRAssembler.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -691,10 +691,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op); break; - case lir_cmove: - cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type()); - break; - case lir_shl: case lir_shr: case lir_ushr: @@ -756,6 +752,17 @@ void LIR_Assembler::emit_op2(LIR_Op2* op } } +void LIR_Assembler::emit_op4(LIR_Op4* op) { + switch(op->code()) { + case lir_cmove: + cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type(), op->in_opr3(), op->in_opr4()); + break; + + default: + Unimplemented(); + break; + } +} void LIR_Assembler::build_frame() { _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); Index: jdk17u-jdk-17.0.5-8/src/hotspot/share/c1/c1_LIRAssembler.hpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -186,6 +186,7 @@ class LIR_Assembler: public CompilationR void emit_op1(LIR_Op1* op); void emit_op2(LIR_Op2* op); void emit_op3(LIR_Op3* op); + void emit_op4(LIR_Op4* op); void emit_opBranch(LIR_OpBranch* op); void emit_opLabel(LIR_OpLabel* op); void emit_arraycopy(LIR_OpArrayCopy* op); @@ -218,8 +219,8 @@ class LIR_Assembler: public CompilationR void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op); - void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type); - + void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr); void call( LIR_OpJavaCall* op, relocInfo::relocType rtype); void ic_call( LIR_OpJavaCall* op); void vtable_call( LIR_OpJavaCall* op); Index: jdk17u-jdk-17.0.5-8/src/hotspot/share/c1/c1_LinearScan.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/share/c1/c1_LinearScan.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/share/c1/c1_LinearScan.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1240,11 +1240,11 @@ void LinearScan::add_register_hints(LIR_ break; } case lir_cmove: { - assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2"); - LIR_Op2* cmove = (LIR_Op2*)op; + assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4"); + LIR_Op4* cmove = (LIR_Op4*)op; LIR_Opr move_from = cmove->in_opr1(); - LIR_Opr move_to = cmove->result_opr(); + LIR_Opr move_to = cmove->result_opr(); if (move_to->is_register() && move_from->is_register()) { Interval* from = interval_at(reg_num(move_from)); @@ -3138,6 +3138,9 @@ void LinearScan::do_linear_scan() { } } +#ifndef RISCV + // Disable these optimizations on riscv temporarily, because it does not + // work when the comparison operands are bound to branches or cmoves. { TIME_LINEAR_SCAN(timer_optimize_lir); EdgeMoveOptimizer::optimize(ir()->code()); @@ -3145,6 +3148,7 @@ void LinearScan::do_linear_scan() { // check that cfg is still correct after optimizations ir()->verify(); } +#endif NOT_PRODUCT(print_lir(1, "Before Code Generation", false)); NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final)); @@ -6368,14 +6372,14 @@ void ControlFlowOptimizer::delete_unnece // There might be a cmove inserted for profiling which depends on the same // compare. If we change the condition of the respective compare, we have // to take care of this cmove as well. - LIR_Op2* prev_cmove = NULL; + LIR_Op4* prev_cmove = NULL; for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) { prev_op = instructions->at(j); // check for the cmove if (prev_op->code() == lir_cmove) { - assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2"); - prev_cmove = (LIR_Op2*)prev_op; + assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4"); + prev_cmove = (LIR_Op4*)prev_op; assert(prev_branch->cond() == prev_cmove->condition(), "should be the same"); } if (prev_op->code() == lir_cmp) { Index: jdk17u-jdk-17.0.5-8/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved. + * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,7 +35,7 @@ #include "utilities/defaultStream.hpp" void ShenandoahArguments::initialize() { -#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined PPC64) +#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined PPC64 || defined RISCV64) vm_exit_during_initialization("Shenandoah GC is not supported on this platform."); #endif Index: jdk17u-jdk-17.0.5-8/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -94,7 +94,7 @@ private: public: LIR_OpZLoadBarrierTest(LIR_Opr opr) : - LIR_Op(), + LIR_Op(lir_zloadbarrier_test, LIR_OprFact::illegalOpr, NULL), _opr(opr) {} virtual void visit(LIR_OpVisitState* state) { Index: jdk17u-jdk-17.0.5-8/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp @@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(co inline bool JfrBigEndian::platform_supports_unaligned_reads(void) { #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390) return true; -#elif defined(ARM) || defined(AARCH64) +#elif defined(ARM) || defined(AARCH64) || defined(RISCV) return false; #else #warning "Unconfigured platform" Index: jdk17u-jdk-17.0.5-8/src/hotspot/share/opto/regmask.hpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/share/opto/regmask.hpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/share/opto/regmask.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -99,7 +99,7 @@ class RegMask { // requirement is internal to the allocator, and independent of any // particular platform. enum { SlotsPerLong = 2, - SlotsPerVecA = 8, + SlotsPerVecA = RISCV_ONLY(4) NOT_RISCV(8), SlotsPerVecS = 1, SlotsPerVecD = 2, SlotsPerVecX = 4, Index: jdk17u-jdk-17.0.5-8/src/hotspot/share/runtime/abstract_vm_version.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/share/runtime/abstract_vm_version.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/share/runtime/abstract_vm_version.cpp @@ -183,7 +183,8 @@ const char* Abstract_VM_Version::jre_rel AMD64_ONLY("amd64") \ IA32_ONLY("x86") \ IA64_ONLY("ia64") \ - S390_ONLY("s390") + S390_ONLY("s390") \ + RISCV64_ONLY("riscv64") #endif // !ZERO #endif // !CPU Index: jdk17u-jdk-17.0.5-8/src/hotspot/share/runtime/synchronizer.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/share/runtime/synchronizer.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/share/runtime/synchronizer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it Index: jdk17u-jdk-17.0.5-8/src/hotspot/share/runtime/thread.inline.hpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/share/runtime/thread.inline.hpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/share/runtime/thread.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2021, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -132,7 +132,7 @@ inline void JavaThread::set_pending_asyn } inline JavaThreadState JavaThread::thread_state() const { -#if defined(PPC64) || defined (AARCH64) +#if defined(PPC64) || defined (AARCH64) || defined(RISCV64) // Use membars when accessing volatile _thread_state. See // Threads::create_vm() for size checks. return (JavaThreadState) Atomic::load_acquire((volatile jint*)&_thread_state); @@ -144,7 +144,7 @@ inline JavaThreadState JavaThread::threa inline void JavaThread::set_thread_state(JavaThreadState s) { assert(current_or_null() == NULL || current_or_null() == this, "state change should only be called by the current thread"); -#if defined(PPC64) || defined (AARCH64) +#if defined(PPC64) || defined (AARCH64) || defined(RISCV64) // Use membars when accessing volatile _thread_state. See // Threads::create_vm() for size checks. Atomic::release_store((volatile jint*)&_thread_state, (jint)s); Index: jdk17u-jdk-17.0.5-8/src/hotspot/share/utilities/macros.hpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/share/utilities/macros.hpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/share/utilities/macros.hpp @@ -553,6 +553,32 @@ #define MACOS_AARCH64_ONLY(x) MACOS_ONLY(AARCH64_ONLY(x)) +#if defined(RISCV32) || defined(RISCV64) +#define RISCV +#define RISCV_ONLY(code) code +#define NOT_RISCV(code) +#else +#undef RISCV +#define RISCV_ONLY(code) +#define NOT_RISCV(code) code +#endif + +#ifdef RISCV32 +#define RISCV32_ONLY(code) code +#define NOT_RISCV32(code) +#else +#define RISCV32_ONLY(code) +#define NOT_RISCV32(code) code +#endif + +#ifdef RISCV64 +#define RISCV64_ONLY(code) code +#define NOT_RISCV64(code) +#else +#define RISCV64_ONLY(code) +#define NOT_RISCV64(code) code +#endif + #ifdef VM_LITTLE_ENDIAN #define LITTLE_ENDIAN_ONLY(code) code #define BIG_ENDIAN_ONLY(code) Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2019, 2021, NTT DATA. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -60,6 +60,10 @@ #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h" #endif +#ifdef riscv64 +#include "sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext.h" +#endif + class AutoJavaString { JNIEnv* m_env; jstring m_str; @@ -408,7 +412,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jv return (err == PS_OK)? array : 0; } -#if defined(i586) || defined(amd64) || defined(ppc64) || defined(ppc64le) || defined(aarch64) +#if defined(i586) || defined(amd64) || defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64) extern "C" JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0 (JNIEnv *env, jobject this_obj, jint lwp_id) { @@ -440,6 +444,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jv #ifdef aarch64 #define NPRGREG sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_NPRGREG #endif +#ifdef riscv64 +#define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG +#endif #if defined(ppc64) || defined(ppc64le) #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG #endif @@ -516,6 +523,44 @@ JNIEXPORT jlongArray JNICALL Java_sun_jv } #endif /* aarch64 */ +#if defined(riscv64) +#define REG_INDEX(reg) sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg + + regs[REG_INDEX(PC)] = gregs.pc; + regs[REG_INDEX(LR)] = gregs.ra; + regs[REG_INDEX(SP)] = gregs.sp; + regs[REG_INDEX(R3)] = gregs.gp; + regs[REG_INDEX(R4)] = gregs.tp; + regs[REG_INDEX(R5)] = gregs.t0; + regs[REG_INDEX(R6)] = gregs.t1; + regs[REG_INDEX(R7)] = gregs.t2; + regs[REG_INDEX(R8)] = gregs.s0; + regs[REG_INDEX(R9)] = gregs.s1; + regs[REG_INDEX(R10)] = gregs.a0; + regs[REG_INDEX(R11)] = gregs.a1; + regs[REG_INDEX(R12)] = gregs.a2; + regs[REG_INDEX(R13)] = gregs.a3; + regs[REG_INDEX(R14)] = gregs.a4; + regs[REG_INDEX(R15)] = gregs.a5; + regs[REG_INDEX(R16)] = gregs.a6; + regs[REG_INDEX(R17)] = gregs.a7; + regs[REG_INDEX(R18)] = gregs.s2; + regs[REG_INDEX(R19)] = gregs.s3; + regs[REG_INDEX(R20)] = gregs.s4; + regs[REG_INDEX(R21)] = gregs.s5; + regs[REG_INDEX(R22)] = gregs.s6; + regs[REG_INDEX(R23)] = gregs.s7; + regs[REG_INDEX(R24)] = gregs.s8; + regs[REG_INDEX(R25)] = gregs.s9; + regs[REG_INDEX(R26)] = gregs.s10; + regs[REG_INDEX(R27)] = gregs.s11; + regs[REG_INDEX(R28)] = gregs.t3; + regs[REG_INDEX(R29)] = gregs.t4; + regs[REG_INDEX(R30)] = gregs.t5; + regs[REG_INDEX(R31)] = gregs.t6; + +#endif /* riscv64 */ + #if defined(ppc64) || defined(ppc64le) #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -43,6 +43,8 @@ #elif defined(arm) #include #define user_regs_struct pt_regs +#elif defined(riscv64) +#include #endif // This C bool type must be int for compatibility with Linux calls and Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java @@ -36,6 +36,7 @@ import sun.jvm.hotspot.debugger.MachineD import sun.jvm.hotspot.debugger.MachineDescriptionAMD64; import sun.jvm.hotspot.debugger.MachineDescriptionPPC64; import sun.jvm.hotspot.debugger.MachineDescriptionAArch64; +import sun.jvm.hotspot.debugger.MachineDescriptionRISCV64; import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; import sun.jvm.hotspot.debugger.NoSuchSymbolException; import sun.jvm.hotspot.debugger.bsd.BsdDebuggerLocal; @@ -569,6 +570,8 @@ public class HotSpotAgent { machDesc = new MachineDescriptionPPC64(); } else if (cpu.equals("aarch64")) { machDesc = new MachineDescriptionAArch64(); + } else if (cpu.equals("riscv64")) { + machDesc = new MachineDescriptionRISCV64(); } else { try { machDesc = (MachineDescription) Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger; + +public class MachineDescriptionRISCV64 extends MachineDescriptionTwosComplement implements MachineDescription { + public long getAddressSize() { + return 8; + } + + public boolean isLP64() { + return true; + } + + public boolean isBigEndian() { + return false; + } +} Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015, Red Hat Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -33,11 +33,13 @@ import sun.jvm.hotspot.debugger.cdbg.*; import sun.jvm.hotspot.debugger.x86.*; import sun.jvm.hotspot.debugger.amd64.*; import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.riscv64.*; import sun.jvm.hotspot.debugger.ppc64.*; import sun.jvm.hotspot.debugger.linux.x86.*; import sun.jvm.hotspot.debugger.linux.amd64.*; import sun.jvm.hotspot.debugger.linux.ppc64.*; import sun.jvm.hotspot.debugger.linux.aarch64.*; +import sun.jvm.hotspot.debugger.linux.riscv64.*; import sun.jvm.hotspot.utilities.*; class LinuxCDebugger implements CDebugger { @@ -105,7 +107,14 @@ class LinuxCDebugger implements CDebugge Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC); if (pc == null) return null; return new LinuxAARCH64CFrame(dbg, fp, pc); - } else { + } else if (cpu.equals("riscv64")) { + RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext(); + Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP); + if (fp == null) return null; + Address pc = context.getRegisterAsAddress(RISCV64ThreadContext.PC); + if (pc == null) return null; + return new LinuxRISCV64CFrame(dbg, fp, pc); + } else { // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu ThreadContext context = (ThreadContext) thread.getContext(); return context.getTopFrame(dbg); Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.debugger.linux.*; +import sun.jvm.hotspot.debugger.cdbg.*; +import sun.jvm.hotspot.debugger.cdbg.basic.*; + +public final class LinuxRISCV64CFrame extends BasicCFrame { + private static final int C_FRAME_LINK_OFFSET = -2; + private static final int C_FRAME_RETURN_ADDR_OFFSET = -1; + + public LinuxRISCV64CFrame(LinuxDebugger dbg, Address fp, Address pc) { + super(dbg.getCDebugger()); + this.fp = fp; + this.pc = pc; + this.dbg = dbg; + } + + // override base class impl to avoid ELF parsing + public ClosestSymbol closestSymbolToPC() { + // try native lookup in debugger. + return dbg.lookup(dbg.getAddressValue(pc())); + } + + public Address pc() { + return pc; + } + + public Address localVariableBase() { + return fp; + } + + public CFrame sender(ThreadProxy thread) { + RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext(); + Address rsp = context.getRegisterAsAddress(RISCV64ThreadContext.SP); + + if ((fp == null) || fp.lessThan(rsp)) { + return null; + } + + // Check alignment of fp + if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) { + return null; + } + + Address nextFP = fp.getAddressAt(C_FRAME_LINK_OFFSET * ADDRESS_SIZE); + if (nextFP == null || nextFP.lessThanOrEqual(fp)) { + return null; + } + Address nextPC = fp.getAddressAt(C_FRAME_RETURN_ADDR_OFFSET * ADDRESS_SIZE); + if (nextPC == null) { + return null; + } + return new LinuxRISCV64CFrame(dbg, nextFP, nextPC); + } + + // package/class internals only + private static final int ADDRESS_SIZE = 8; + private Address pc; + private Address sp; + private Address fp; + private LinuxDebugger dbg; +} Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.debugger.linux.*; + +public class LinuxRISCV64ThreadContext extends RISCV64ThreadContext { + private LinuxDebugger debugger; + + public LinuxRISCV64ThreadContext(LinuxDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.debugger.proc.*; +import sun.jvm.hotspot.utilities.*; + +public class ProcRISCV64Thread implements ThreadProxy { + private ProcDebugger debugger; + private int id; + + public ProcRISCV64Thread(ProcDebugger debugger, Address addr) { + this.debugger = debugger; + + // FIXME: the size here should be configurable. However, making it + // so would produce a dependency on the "types" package from the + // debugger package, which is not desired. + this.id = (int) addr.getCIntegerAt(0, 4, true); + } + + public ProcRISCV64Thread(ProcDebugger debugger, long id) { + this.debugger = debugger; + this.id = (int) id; + } + + public ThreadContext getContext() throws IllegalThreadStateException { + ProcRISCV64ThreadContext context = new ProcRISCV64ThreadContext(debugger); + long[] regs = debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size mismatch"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } + + public boolean canSetContext() throws DebuggerException { + return false; + } + + public void setContext(ThreadContext context) + throws IllegalThreadStateException, DebuggerException { + throw new DebuggerException("Unimplemented"); + } + + public String toString() { + return "t@" + id; + } + + public boolean equals(Object obj) { + if ((obj == null) || !(obj instanceof ProcRISCV64Thread)) { + return false; + } + + return (((ProcRISCV64Thread) obj).id == id); + } + + public int hashCode() { + return id; + } +} Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcRISCV64ThreadContext extends RISCV64ThreadContext { + private ProcDebugger debugger; + + public ProcRISCV64ThreadContext(ProcDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcRISCV64ThreadFactory implements ProcThreadFactory { + private ProcDebugger debugger; + + public ProcRISCV64ThreadFactory(ProcDebugger debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new ProcRISCV64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new ProcRISCV64Thread(debugger, id); + } +} Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.debugger.remote.*; +import sun.jvm.hotspot.utilities.*; + +public class RemoteRISCV64Thread extends RemoteThread { + public RemoteRISCV64Thread(RemoteDebuggerClient debugger, Address addr) { + super(debugger, addr); + } + + public RemoteRISCV64Thread(RemoteDebuggerClient debugger, long id) { + super(debugger, id); + } + + public ThreadContext getContext() throws IllegalThreadStateException { + RemoteRISCV64ThreadContext context = new RemoteRISCV64ThreadContext(debugger); + long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : + debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size of register set must match"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } +} Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteRISCV64ThreadContext extends RISCV64ThreadContext { + private RemoteDebuggerClient debugger; + + public RemoteRISCV64ThreadContext(RemoteDebuggerClient debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteRISCV64ThreadFactory implements RemoteThreadFactory { + private RemoteDebuggerClient debugger; + + public RemoteRISCV64ThreadFactory(RemoteDebuggerClient debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new RemoteRISCV64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new RemoteRISCV64Thread(debugger, id); + } +} Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.riscv64; + +import java.lang.annotation.Native; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.cdbg.*; + +/** Specifies the thread context on riscv64 platforms; only a sub-portion + * of the context is guaranteed to be present on all operating + * systems. */ + +public abstract class RISCV64ThreadContext implements ThreadContext { + // Taken from /usr/include/asm/sigcontext.h on Linux/RISCV64. + + // /* + // * Signal context structure - contains all info to do with the state + // * before the signal handler was invoked. + // */ + // struct sigcontext { + // struct user_regs_struct sc_regs; + // union __riscv_fp_state sc_fpregs; + // }; + // + // struct user_regs_struct { + // unsigned long pc; + // unsigned long ra; + // unsigned long sp; + // unsigned long gp; + // unsigned long tp; + // unsigned long t0; + // unsigned long t1; + // unsigned long t2; + // unsigned long s0; + // unsigned long s1; + // unsigned long a0; + // unsigned long a1; + // unsigned long a2; + // unsigned long a3; + // unsigned long a4; + // unsigned long a5; + // unsigned long a6; + // unsigned long a7; + // unsigned long s2; + // unsigned long s3; + // unsigned long s4; + // unsigned long s5; + // unsigned long s6; + // unsigned long s7; + // unsigned long s8; + // unsigned long s9; + // unsigned long s10; + // unsigned long s11; + // unsigned long t3; + // unsigned long t4; + // unsigned long t5; + // unsigned long t6; + // }; + + // NOTE: the indices for the various registers must be maintained as + // listed across various operating systems. However, only a small + // subset of the registers' values are guaranteed to be present (and + // must be present for the SA's stack walking to work) + + // One instance of the Native annotation is enough to trigger header generation + // for this file. + @Native + public static final int R0 = 0; + public static final int R1 = 1; + public static final int R2 = 2; + public static final int R3 = 3; + public static final int R4 = 4; + public static final int R5 = 5; + public static final int R6 = 6; + public static final int R7 = 7; + public static final int R8 = 8; + public static final int R9 = 9; + public static final int R10 = 10; + public static final int R11 = 11; + public static final int R12 = 12; + public static final int R13 = 13; + public static final int R14 = 14; + public static final int R15 = 15; + public static final int R16 = 16; + public static final int R17 = 17; + public static final int R18 = 18; + public static final int R19 = 19; + public static final int R20 = 20; + public static final int R21 = 21; + public static final int R22 = 22; + public static final int R23 = 23; + public static final int R24 = 24; + public static final int R25 = 25; + public static final int R26 = 26; + public static final int R27 = 27; + public static final int R28 = 28; + public static final int R29 = 29; + public static final int R30 = 30; + public static final int R31 = 31; + + public static final int NPRGREG = 32; + + public static final int PC = R0; + public static final int LR = R1; + public static final int SP = R2; + public static final int FP = R8; + + private long[] data; + + public RISCV64ThreadContext() { + data = new long[NPRGREG]; + } + + public int getNumRegisters() { + return NPRGREG; + } + + public String getRegisterName(int index) { + switch (index) { + case LR: return "lr"; + case SP: return "sp"; + case PC: return "pc"; + default: + return "r" + index; + } + } + + public void setRegister(int index, long value) { + data[index] = value; + } + + public long getRegister(int index) { + return data[index]; + } + + public CFrame getTopFrame(Debugger dbg) { + return null; + } + + /** This can't be implemented in this class since we would have to + * tie the implementation to, for example, the debugging system */ + public abstract void setRegisterAsAddress(int index, Address value); + + /** This can't be implemented in this class since we would have to + * tie the implementation to, for example, the debugging system */ + public abstract Address getRegisterAsAddress(int index); +} Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,6 +34,7 @@ import sun.jvm.hotspot.runtime.win32_aar import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; +import sun.jvm.hotspot.runtime.linux_riscv64.LinuxRISCV64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.bsd_amd64.BsdAMD64JavaThreadPDAccess; @@ -113,6 +114,8 @@ public class Threads { access = new LinuxPPC64JavaThreadPDAccess(); } else if (cpu.equals("aarch64")) { access = new LinuxAARCH64JavaThreadPDAccess(); + } else if (cpu.equals("riscv64")) { + access = new LinuxRISCV64JavaThreadPDAccess(); } else { try { access = (JavaThreadPDAccess) Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.linux_riscv64; + +import java.io.*; +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.riscv64.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; +import sun.jvm.hotspot.utilities.Observable; +import sun.jvm.hotspot.utilities.Observer; + +public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess { + private static AddressField lastJavaFPField; + private static AddressField osThreadField; + + // Field from OSThread + private static CIntegerField osThreadThreadIDField; + + // This is currently unneeded but is being kept in case we change + // the currentFrameGuess algorithm + private static final long GUESS_SCAN_RANGE = 128 * 1024; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaThread"); + osThreadField = type.getAddressField("_osthread"); + + Type anchorType = db.lookupType("JavaFrameAnchor"); + lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); + + Type osThreadType = db.lookupType("OSThread"); + osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); + } + + public Address getLastJavaFP(Address addr) { + return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); + } + + public Address getLastJavaPC(Address addr) { + return null; + } + + public Address getBaseOfStackPointer(Address addr) { + return null; + } + + public Frame getLastFramePD(JavaThread thread, Address addr) { + Address fp = thread.getLastJavaFP(); + if (fp == null) { + return null; // no information + } + return new RISCV64Frame(thread.getLastJavaSP(), fp); + } + + public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { + return new RISCV64RegisterMap(thread, updateMap); + } + + public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { + ThreadProxy t = getThreadProxy(addr); + RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext(); + RISCV64CurrentFrameGuess guesser = new RISCV64CurrentFrameGuess(context, thread); + if (!guesser.run(GUESS_SCAN_RANGE)) { + return null; + } + if (guesser.getPC() == null) { + return new RISCV64Frame(guesser.getSP(), guesser.getFP()); + } else { + return new RISCV64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); + } + } + + public void printThreadIDOn(Address addr, PrintStream tty) { + tty.print(getThreadProxy(addr)); + } + + public void printInfoOn(Address threadAddr, PrintStream tty) { + tty.print("Thread id: "); + printThreadIDOn(threadAddr, tty); + } + + public Address getLastSP(Address addr) { + ThreadProxy t = getThreadProxy(addr); + RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext(); + return context.getRegisterAsAddress(RISCV64ThreadContext.SP); + } + + public ThreadProxy getThreadProxy(Address addr) { + // Addr is the address of the JavaThread. + // Fetch the OSThread (for now and for simplicity, not making a + // separate "OSThread" class in this package) + Address osThreadAddr = osThreadField.getValue(addr); + // Get the address of the _thread_id from the OSThread + Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); + + JVMDebugger debugger = VM.getVM().getDebugger(); + return debugger.getThreadForIdentifierAddress(threadIdAddr); + } +} Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.riscv64.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.interpreter.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.riscv64.*; + +/**

Should be able to be used on all riscv64 platforms we support + (Linux/riscv64) to implement JavaThread's "currentFrameGuess()" + functionality. Input is an RISCV64ThreadContext; output is SP, FP, + and PC for an RISCV64Frame. Instantiation of the RISCV64Frame is + left to the caller, since we may need to subclass RISCV64Frame to + support signal handler frames on Unix platforms.

+ +

Algorithm is to walk up the stack within a given range (say, + 512K at most) looking for a plausible PC and SP for a Java frame, + also considering those coming in from the context. If we find a PC + that belongs to the VM (i.e., in generated code like the + interpreter or CodeCache) then we try to find an associated FP. + We repeat this until we either find a complete frame or run out of + stack to look at.

*/ + +public class RISCV64CurrentFrameGuess { + private RISCV64ThreadContext context; + private JavaThread thread; + private Address spFound; + private Address fpFound; + private Address pcFound; + + private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.riscv64.RISCV64Frame.DEBUG") + != null; + + public RISCV64CurrentFrameGuess(RISCV64ThreadContext context, + JavaThread thread) { + this.context = context; + this.thread = thread; + } + + /** Returns false if not able to find a frame within a reasonable range. */ + public boolean run(long regionInBytesToSearch) { + Address sp = context.getRegisterAsAddress(RISCV64ThreadContext.SP); + Address pc = context.getRegisterAsAddress(RISCV64ThreadContext.PC); + Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP); + if (sp == null) { + // Bail out if no last java frame either + if (thread.getLastJavaSP() != null) { + setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); + return true; + } + return false; + } + Address end = sp.addOffsetTo(regionInBytesToSearch); + VM vm = VM.getVM(); + + setValues(null, null, null); // Assume we're not going to find anything + + if (vm.isJavaPCDbg(pc)) { + if (vm.isClientCompiler()) { + // If the topmost frame is a Java frame, we are (pretty much) + // guaranteed to have a viable FP. We should be more robust + // than this (we have the potential for losing entire threads' + // stack traces) but need to see how much work we really have + // to do here. Searching the stack for an (SP, FP) pair is + // hard since it's easy to misinterpret inter-frame stack + // pointers as base-of-frame pointers; we also don't know the + // sizes of C1 frames (not registered in the nmethod) so can't + // derive them from SP. + + setValues(sp, fp, pc); + return true; + } else { + if (vm.getInterpreter().contains(pc)) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + + sp + ", fp = " + fp + ", pc = " + pc); + } + setValues(sp, fp, pc); + return true; + } + + // For the server compiler, FP is not guaranteed to be valid + // for compiled code. In addition, an earlier attempt at a + // non-searching algorithm (see below) failed because the + // stack pointer from the thread context was pointing + // (considerably) beyond the ostensible end of the stack, into + // garbage; walking from the topmost frame back caused a crash. + // + // This algorithm takes the current PC as a given and tries to + // find the correct corresponding SP by walking up the stack + // and repeatedly performing stackwalks (very inefficient). + // + // FIXME: there is something wrong with stackwalking across + // adapter frames...this is likely to be the root cause of the + // failure with the simpler algorithm below. + + for (long offset = 0; + offset < regionInBytesToSearch; + offset += vm.getAddressSize()) { + try { + Address curSP = sp.addOffsetTo(offset); + Frame frame = new RISCV64Frame(curSP, null, pc); + RegisterMap map = thread.newRegisterMap(false); + while (frame != null) { + if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { + // We were able to traverse all the way to the + // bottommost Java frame. + // This sp looks good. Keep it. + if (DEBUG) { + System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); + } + setValues(curSP, null, pc); + return true; + } + frame = frame.sender(map); + } + } catch (Exception e) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); + } + // Bad SP. Try another. + } + } + + // Were not able to find a plausible SP to go with this PC. + // Bail out. + return false; + } + } else { + // If the current program counter was not known to us as a Java + // PC, we currently assume that we are in the run-time system + // and attempt to look to thread-local storage for saved SP and + // FP. Note that if these are null (because we were, in fact, + // in Java code, i.e., vtable stubs or similar, and the SA + // didn't have enough insight into the target VM to understand + // that) then we are going to lose the entire stack trace for + // the thread, which is sub-optimal. FIXME. + + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + + thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); + } + if (thread.getLastJavaSP() == null) { + return false; // No known Java frames on stack + } + + // The runtime has a nasty habit of not saving fp in the frame + // anchor, leaving us to grovel about in the stack to find a + // plausible address. Fortunately, this only happens in + // compiled code; there we always have a valid PC, and we always + // push LR and FP onto the stack as a pair, with FP at the lower + // address. + pc = thread.getLastJavaPC(); + fp = thread.getLastJavaFP(); + sp = thread.getLastJavaSP(); + + if (fp == null) { + CodeCache cc = vm.getCodeCache(); + if (cc.contains(pc)) { + CodeBlob cb = cc.findBlob(pc); + if (DEBUG) { + System.out.println("FP is null. Found blob frame size " + cb.getFrameSize()); + } + // See if we can derive a frame pointer from SP and PC + long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize(); + if (link_offset >= 0) { + fp = sp.addOffsetTo(link_offset); + } + } + } + + // We found a PC in the frame anchor. Check that it's plausible, and + // if it is, use it. + if (vm.isJavaPCDbg(pc)) { + setValues(sp, fp, pc); + } else { + setValues(sp, fp, null); + } + + return true; + } + } + + public Address getSP() { return spFound; } + public Address getFP() { return fpFound; } + /** May be null if getting values from thread-local storage; take + care to call the correct RISCV64Frame constructor to recover this if + necessary */ + public Address getPC() { return pcFound; } + + private void setValues(Address sp, Address fp, Address pc) { + spFound = sp; + fpFound = fp; + pcFound = pc; + } +} Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java @@ -0,0 +1,556 @@ +/* + * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Red Hat Inc. + * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.riscv64; + +import java.util.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.compiler.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.oops.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; +import sun.jvm.hotspot.utilities.Observable; +import sun.jvm.hotspot.utilities.Observer; + +/** Specialization of and implementation of abstract methods of the + Frame class for the riscv64 family of CPUs. */ + +public class RISCV64Frame extends Frame { + private static final boolean DEBUG; + static { + DEBUG = System.getProperty("sun.jvm.hotspot.runtime.RISCV64.RISCV64Frame.DEBUG") != null; + } + + // Java frames + private static final int LINK_OFFSET = -2; + private static final int RETURN_ADDR_OFFSET = -1; + private static final int SENDER_SP_OFFSET = 0; + + // Interpreter frames + private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -3; + private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; + private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; + private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only + private static int INTERPRETER_FRAME_PADDING_OFFSET; + private static int INTERPRETER_FRAME_MIRROR_OFFSET; + private static int INTERPRETER_FRAME_CACHE_OFFSET; + private static int INTERPRETER_FRAME_LOCALS_OFFSET; + private static int INTERPRETER_FRAME_BCX_OFFSET; + private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET; + + // Entry frames + private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -10; + + // Native frames + private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET = 2; + + private static VMReg fp = new VMReg(8); + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; + INTERPRETER_FRAME_PADDING_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; + INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_PADDING_OFFSET - 1; + INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; + INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; + INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; + INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; + INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + } + + + // an additional field beyond sp and pc: + Address raw_fp; // frame pointer + private Address raw_unextendedSP; + + private RISCV64Frame() { + } + + private void adjustForDeopt() { + if ( pc != null) { + // Look for a deopt pc and if it is deopted convert to original pc + CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); + if (cb != null && cb.isJavaMethod()) { + NMethod nm = (NMethod) cb; + if (pc.equals(nm.deoptHandlerBegin())) { + if (Assert.ASSERTS_ENABLED) { + Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); + } + // adjust pc if frame is deoptimized. + pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); + deoptimized = true; + } + } + } + } + + public RISCV64Frame(Address raw_sp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("RISCV64Frame(sp, fp, pc): " + this); + dumpStack(); + } + } + + public RISCV64Frame(Address raw_sp, Address raw_fp) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + + // We cannot assume SP[-1] always contains a valid return PC (e.g. if + // the callee is a C/C++ compiled frame). If the PC is not known to + // Java then this.pc is null. + Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); + if (VM.getVM().isJavaPCDbg(savedPC)) { + this.pc = savedPC; + } + + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("RISCV64Frame(sp, fp): " + this); + dumpStack(); + } + } + + public RISCV64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_unextendedSp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("RISCV64Frame(sp, unextendedSP, fp, pc): " + this); + dumpStack(); + } + + } + + public Object clone() { + RISCV64Frame frame = new RISCV64Frame(); + frame.raw_sp = raw_sp; + frame.raw_unextendedSP = raw_unextendedSP; + frame.raw_fp = raw_fp; + frame.pc = pc; + frame.deoptimized = deoptimized; + return frame; + } + + public boolean equals(Object arg) { + if (arg == null) { + return false; + } + + if (!(arg instanceof RISCV64Frame)) { + return false; + } + + RISCV64Frame other = (RISCV64Frame) arg; + + return (AddressOps.equal(getSP(), other.getSP()) && + AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && + AddressOps.equal(getFP(), other.getFP()) && + AddressOps.equal(getPC(), other.getPC())); + } + + public int hashCode() { + if (raw_sp == null) { + return 0; + } + + return raw_sp.hashCode(); + } + + public String toString() { + return "sp: " + (getSP() == null? "null" : getSP().toString()) + + ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + + ", fp: " + (getFP() == null? "null" : getFP().toString()) + + ", pc: " + (pc == null? "null" : pc.toString()); + } + + // accessors for the instance variables + public Address getFP() { return raw_fp; } + public Address getSP() { return raw_sp; } + public Address getID() { return raw_sp; } + + // FIXME: not implemented yet + public boolean isSignalHandlerFrameDbg() { return false; } + public int getSignalNumberDbg() { return 0; } + public String getSignalNameDbg() { return null; } + + public boolean isInterpretedFrameValid() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "Not an interpreted frame"); + } + + // These are reasonable sanity checks + if (getFP() == null || getFP().andWithMask(0x3) != null) { + return false; + } + + if (getSP() == null || getSP().andWithMask(0x3) != null) { + return false; + } + + if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { + return false; + } + + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (getFP().lessThanOrEqual(getSP())) { + // this attempts to deal with unsigned comparison above + return false; + } + + if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { + // stack frames shouldn't be large. + return false; + } + + return true; + } + + public Frame sender(RegisterMap regMap, CodeBlob cb) { + RISCV64RegisterMap map = (RISCV64RegisterMap) regMap; + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map.setIncludeArgumentOops(false); + + if (isEntryFrame()) return senderForEntryFrame(map); + if (isInterpretedFrame()) return senderForInterpreterFrame(map); + + if(cb == null) { + cb = VM.getVM().getCodeCache().findBlob(getPC()); + } else { + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); + } + } + + if (cb != null) { + return senderForCompiledFrame(map, cb); + } + + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return new RISCV64Frame(getSenderSP(), getLink(), getSenderPC()); + } + + private Frame senderForEntryFrame(RISCV64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForEntryFrame"); + } + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + RISCV64JavaCallWrapper jcw = (RISCV64JavaCallWrapper) getEntryFrameCallWrapper(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); + Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); + } + RISCV64Frame fr; + if (jcw.getLastJavaPC() != null) { + fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); + } else { + fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); + } + map.clear(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); + } + return fr; + } + + //------------------------------------------------------------------------------ + // frame::adjust_unextended_sp + private void adjustUnextendedSP() { + // If we are returning to a compiled MethodHandle call site, the + // saved_fp will in fact be a saved value of the unextended SP. The + // simplest way to tell whether we are returning to such a call site + // is as follows: + + CodeBlob cb = cb(); + NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); + if (senderNm != null) { + // If the sender PC is a deoptimization point, get the original + // PC. For MethodHandle call site the unextended_sp is stored in + // saved_fp. + if (senderNm.isDeoptMhEntry(getPC())) { + raw_unextendedSP = getFP(); + } + else if (senderNm.isDeoptEntry(getPC())) { + } + else if (senderNm.isMethodHandleReturn(getPC())) { + raw_unextendedSP = getFP(); + } + } + } + + private Frame senderForInterpreterFrame(RISCV64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForInterpreterFrame"); + } + Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + Address sp = addressOfStackSlot(SENDER_SP_OFFSET); + // We do not need to update the callee-save register mapping because above + // us is either another interpreter frame or a converter-frame, but never + // directly a compiled frame. + // 11/24/04 SFG. With the removal of adapter frames this is no longer true. + // However c2 no longer uses callee save register for java calls so there + // are no callee register to find. + + if (map.getUpdateMap()) + updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); + + return new RISCV64Frame(sp, unextendedSP, getLink(), getSenderPC()); + } + + private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { + map.setLocation(fp, savedFPAddr); + } + + private Frame senderForCompiledFrame(RISCV64RegisterMap map, CodeBlob cb) { + if (DEBUG) { + System.out.println("senderForCompiledFrame"); + } + + // + // NOTE: some of this code is (unfortunately) duplicated RISCV64CurrentFrameGuess + // + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // frame owned by optimizing compiler + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); + } + Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); + + // The return_address is always the word on the stack + Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); + + // This is the saved value of FP which may or may not really be an FP. + // It is only an FP if the sender is an interpreter frame. + Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize()); + + if (map.getUpdateMap()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map.setIncludeArgumentOops(cb.callerMustGCArguments()); + + if (cb.getOopMaps() != null) { + ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); + } + + // Since the prolog does the save and restore of FP there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + updateMapWithSavedLink(map, savedFPAddr); + } + + return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); + } + + protected boolean hasSenderPD() { + return true; + } + + public long frameSize() { + return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); + } + + public Address getLink() { + try { + if (DEBUG) { + System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET) + + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0)); + } + return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); + } catch (Exception e) { + if (DEBUG) + System.out.println("Returning null"); + return null; + } + } + + public Address getUnextendedSP() { return raw_unextendedSP; } + + // Return address: + public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } + public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } + + // return address of param, zero origin index. + public Address getNativeParamAddr(int idx) { + return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx); + } + + public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } + + public Address addressOfInterpreterFrameLocals() { + return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); + } + + private Address addressOfInterpreterFrameBCX() { + return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); + } + + public int getInterpreterFrameBCI() { + // FIXME: this is not atomic with respect to GC and is unsuitable + // for use in a non-debugging, or reflective, system. Need to + // figure out how to express this. + Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); + Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); + Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); + return bcpToBci(bcp, method); + } + + public Address addressOfInterpreterFrameMDX() { + return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); + } + + // expression stack + // (the max_stack arguments are used by the GC; see class FrameClosure) + + public Address addressOfInterpreterFrameExpressionStack() { + Address monitorEnd = interpreterFrameMonitorEnd().address(); + return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); + } + + public int getInterpreterFrameExpressionStackDirection() { return -1; } + + // top of expression stack + public Address addressOfInterpreterFrameTOS() { + return getSP(); + } + + /** Expression stack from top down */ + public Address addressOfInterpreterFrameTOSAt(int slot) { + return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); + } + + public Address getInterpreterFrameSenderSP() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "interpreted frame expected"); + } + return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + } + + // Monitors + public BasicObjectLock interpreterFrameMonitorBegin() { + return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); + } + + public BasicObjectLock interpreterFrameMonitorEnd() { + Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); + if (Assert.ASSERTS_ENABLED) { + // make sure the pointer points inside the frame + Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); + Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); + } + return new BasicObjectLock(result); + } + + public int interpreterFrameMonitorSize() { + return BasicObjectLock.size(); + } + + // Method + public Address addressOfInterpreterFrameMethod() { + return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); + } + + // Constant pool cache + public Address addressOfInterpreterFrameCPCache() { + return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); + } + + // Entry frames + public JavaCallWrapper getEntryFrameCallWrapper() { + return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); + } + + protected Address addressOfSavedOopResult() { + // offset is 2 for compiler2 and 3 for compiler1 + return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * + VM.getVM().getAddressSize()); + } + + protected Address addressOfSavedReceiver() { + return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + } + + private void dumpStack() { + for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + AddressOps.lt(addr, getSP()); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + System.out.println("-----------------------"); + for (Address addr = getSP(); + AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + } +} Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.riscv64; + +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.utilities.*; +import sun.jvm.hotspot.utilities.Observable; +import sun.jvm.hotspot.utilities.Observer; + +public class RISCV64JavaCallWrapper extends JavaCallWrapper { + private static AddressField lastJavaFPField; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaFrameAnchor"); + + lastJavaFPField = type.getAddressField("_last_Java_fp"); + } + + public RISCV64JavaCallWrapper(Address addr) { + super(addr); + } + + public Address getLastJavaFP() { + return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); + } +} Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.riscv64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.runtime.*; + +public class RISCV64RegisterMap extends RegisterMap { + + /** This is the only public constructor */ + public RISCV64RegisterMap(JavaThread thread, boolean updateMap) { + super(thread, updateMap); + } + + protected RISCV64RegisterMap(RegisterMap map) { + super(map); + } + + public Object clone() { + RISCV64RegisterMap retval = new RISCV64RegisterMap(this); + return retval; + } + + // no PD state to clear or copy: + protected void clearPD() {} + protected void initializePD() {} + protected void initializeFromPD(RegisterMap map) {} + protected Address getLocationPD(VMReg reg) { return null; } +} Index: jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +++ jdk17u-jdk-17.0.5-8/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -50,7 +50,7 @@ public class PlatformInfo { public static boolean knownCPU(String cpu) { final String[] KNOWN = - new String[] {"i386", "x86", "x86_64", "amd64", "ppc64", "ppc64le", "aarch64"}; + new String[] {"i386", "x86", "x86_64", "amd64", "ppc64", "ppc64le", "aarch64", "riscv64"}; for(String s : KNOWN) { if(s.equals(cpu)) Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/c2/TestBit.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/c2/TestBit.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/c2/TestBit.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,7 +33,7 @@ import jdk.test.lib.process.ProcessTools * @library /test/lib / * * @requires vm.flagless - * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" + * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64" * @requires vm.debug == true & vm.compiler2.enabled * * @run driver compiler.c2.TestBit @@ -55,7 +55,8 @@ public class TestBit { String expectedTestBitInstruction = "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" : "aarch64".equals(System.getProperty("os.arch")) ? "tb" : - "amd64".equals(System.getProperty("os.arch")) ? "test" : null; + "amd64".equals(System.getProperty("os.arch")) ? "test" : + "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null; if (expectedTestBitInstruction != null) { output.shouldContain(expectedTestBitInstruction); Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,6 +39,7 @@ package compiler.intrinsics.sha.cli; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; +import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; @@ -49,6 +50,8 @@ public class TestUseSHA1IntrinsicsOption DigestOptionsBase.USE_SHA1_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( DigestOptionsBase.USE_SHA1_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedRISCV64CPU( + DigestOptionsBase.USE_SHA1_INTRINSICS_OPTION), new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( DigestOptionsBase.USE_SHA1_INTRINSICS_OPTION), new GenericTestCaseForOtherCPU( Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,6 +39,7 @@ package compiler.intrinsics.sha.cli; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; +import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; @@ -49,6 +50,8 @@ public class TestUseSHA256IntrinsicsOpti DigestOptionsBase.USE_SHA256_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( DigestOptionsBase.USE_SHA256_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedRISCV64CPU( + DigestOptionsBase.USE_SHA256_INTRINSICS_OPTION), new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( DigestOptionsBase.USE_SHA256_INTRINSICS_OPTION), new GenericTestCaseForOtherCPU( Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,6 +39,7 @@ package compiler.intrinsics.sha.cli; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; +import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; @@ -49,6 +50,8 @@ public class TestUseSHA512IntrinsicsOpti DigestOptionsBase.USE_SHA512_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( DigestOptionsBase.USE_SHA512_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedRISCV64CPU( + DigestOptionsBase.USE_SHA512_INTRINSICS_OPTION), new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( DigestOptionsBase.USE_SHA512_INTRINSICS_OPTION), new GenericTestCaseForOtherCPU( Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,6 +39,7 @@ package compiler.intrinsics.sha.cli; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; +import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU; @@ -49,6 +50,8 @@ public class TestUseSHAOptionOnUnsupport DigestOptionsBase.USE_SHA_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( DigestOptionsBase.USE_SHA_OPTION), + new GenericTestCaseForUnsupportedRISCV64CPU( + DigestOptionsBase.USE_SHA_OPTION), new UseSHASpecificTestCaseForUnsupportedCPU( DigestOptionsBase.USE_SHA_OPTION), new GenericTestCaseForOtherCPU( Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,7 +32,7 @@ import jdk.test.lib.cli.predicate.OrPred /** * Generic test case for SHA-related options targeted to any CPU except - * AArch64, PPC, S390x, and X86. + * AArch64, RISCV64, PPC, S390x, and X86. */ public class GenericTestCaseForOtherCPU extends DigestOptionsBase.TestCase { @@ -44,13 +44,14 @@ public class GenericTestCaseForOtherCPU } public GenericTestCaseForOtherCPU(String optionName, boolean checkUseSHA) { - // Execute the test case on any CPU except AArch64, PPC, S390x, and X86. + // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, and X86. super(optionName, new NotPredicate( new OrPredicate(Platform::isAArch64, + new OrPredicate(Platform::isRISCV64, new OrPredicate(Platform::isS390x, new OrPredicate(Platform::isPPC, new OrPredicate(Platform::isX64, - Platform::isX86)))))); + Platform::isX86))))))); this.checkUseSHA = checkUseSHA; } @@ -59,7 +60,7 @@ public class GenericTestCaseForOtherCPU protected void verifyWarnings() throws Throwable { String shouldPassMessage = String.format("JVM should start with " + "option '%s' without any warnings", optionName); - // Verify that on non-x86 and non-AArch64 CPU usage of SHA-related + // Verify that on non-x86, non-RISCV64 and non-AArch64 CPU usage of SHA-related // options will not cause any warnings. CommandLineOptionTest.verifySameJVMStartup(null, new String[] { ".*" + optionName + ".*" }, shouldPassMessage, Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.intrinsics.sha.cli.testcases; + +import compiler.intrinsics.sha.cli.DigestOptionsBase; +import jdk.test.lib.process.ExitCode; +import jdk.test.lib.Platform; +import jdk.test.lib.cli.CommandLineOptionTest; +import jdk.test.lib.cli.predicate.AndPredicate; +import jdk.test.lib.cli.predicate.NotPredicate; + +/** + * Generic test case for SHA-related options targeted to RISCV64 CPUs + * which don't support instruction required by the tested option. + */ +public class GenericTestCaseForUnsupportedRISCV64CPU extends + DigestOptionsBase.TestCase { + + final private boolean checkUseSHA; + + public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) { + this(optionName, true); + } + + public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) { + super(optionName, new AndPredicate(Platform::isRISCV64, + new NotPredicate(DigestOptionsBase.getPredicateForOption( + optionName)))); + + this.checkUseSHA = checkUseSHA; + } + + @Override + protected void verifyWarnings() throws Throwable { + String shouldPassMessage = String.format("JVM startup should pass with" + + "option '-XX:-%s' without any warnings", optionName); + //Verify that option could be disabled without any warnings. + CommandLineOptionTest.verifySameJVMStartup(null, new String[] { + DigestOptionsBase.getWarningForUnsupportedCPU(optionName) + }, shouldPassMessage, shouldPassMessage, ExitCode.OK, + DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, + CommandLineOptionTest.prepareBooleanFlag(optionName, false)); + + if (checkUseSHA) { + shouldPassMessage = String.format("If JVM is started with '-XX:-" + + "%s' '-XX:+%s', output should contain warning.", + DigestOptionsBase.USE_SHA_OPTION, optionName); + + // Verify that when the tested option is enabled, then + // a warning will occur in VM output if UseSHA is disabled. + if (!optionName.equals(DigestOptionsBase.USE_SHA_OPTION)) { + CommandLineOptionTest.verifySameJVMStartup( + new String[] { DigestOptionsBase.getWarningForUnsupportedCPU(optionName) }, + null, + shouldPassMessage, + shouldPassMessage, + ExitCode.OK, + DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, + CommandLineOptionTest.prepareBooleanFlag(DigestOptionsBase.USE_SHA_OPTION, false), + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + } + } + } + + @Override + protected void verifyOptionValues() throws Throwable { + // Verify that option is disabled by default. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + String.format("Option '%s' should be disabled by default", + optionName), + DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); + + if (checkUseSHA) { + // Verify that option is disabled even if it was explicitly enabled + // using CLI options. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + String.format("Option '%s' should be off on unsupported " + + "RISCV64CPU even if set to true directly", optionName), + DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + + // Verify that option is disabled when +UseSHA was passed to JVM. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + String.format("Option '%s' should be off on unsupported " + + "RISCV64CPU even if %s flag set to JVM", + optionName, CommandLineOptionTest.prepareBooleanFlag( + DigestOptionsBase.USE_SHA_OPTION, true)), + DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, + CommandLineOptionTest.prepareBooleanFlag( + DigestOptionsBase.USE_SHA_OPTION, true)); + } + } +} Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions * -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1 Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8138583 * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test - * @requires os.arch=="aarch64" + * @requires os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8138583 * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test - * @requires os.arch=="aarch64" + * @requires os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8135028 * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -68,14 +68,16 @@ public class IntrinsicPredicates { public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null), + new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null), new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha1" }, null), // x86 variants new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null), new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), - new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null))))); + new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null)))))); public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256" }, null), + new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha256" }, null), new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha256" }, null), new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), new OrPredicate(new CPUSpecificPredicate("ppc64le.*", new String[] { "sha" }, null), @@ -84,10 +86,11 @@ public class IntrinsicPredicates { new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), - new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))); + new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512" }, null), + new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha512" }, null), new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha512" }, null), new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), new OrPredicate(new CPUSpecificPredicate("ppc64le.*", new String[] { "sha" }, null), @@ -96,7 +99,7 @@ public class IntrinsicPredicates { new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), - new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))); + new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); public static final BooleanSupplier SHA3_INSTRUCTION_AVAILABLE // sha3 is only implemented on aarch64 for now Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -133,7 +133,7 @@ public class CheckForProperDetailStackTr // It's ok for ARM not to have symbols, because it does not support NMT detail // when targeting thumb2. It's also ok for Windows not to have symbols, because // they are only available if the symbols file is included with the build. - if (Platform.isWindows() || Platform.isARM()) { + if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) { return; // we are done } output.reportDiagnosticSummary(); Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -240,7 +240,7 @@ public class ReservedStackTest { return Platform.isAix() || (Platform.isLinux() && (Platform.isPPC() || Platform.isS390x() || Platform.isX64() || - Platform.isX86() || Platform.isAArch64())) || + Platform.isX86() || Platform.isAArch64() || Platform.isRISCV64())) || Platform.isOSX(); } Index: jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/serviceability/AsyncGetCallTrace/MyPackage/ASGCTBaseTest.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/hotspot/jtreg/serviceability/AsyncGetCallTrace/MyPackage/ASGCTBaseTest.java +++ jdk17u-jdk-17.0.5-8/test/hotspot/jtreg/serviceability/AsyncGetCallTrace/MyPackage/ASGCTBaseTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2019, Google and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -29,7 +29,7 @@ package MyPackage; * @summary Verifies that AsyncGetCallTrace is call-able and provides sane information. * @compile ASGCTBaseTest.java * @requires os.family == "linux" - * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="arm" | os.arch=="aarch64" | os.arch=="ppc64" | os.arch=="s390" + * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="arm" | os.arch=="aarch64" | os.arch=="ppc64" | os.arch=="s390" | os.arch=="riscv64" * @requires vm.jvmti * @run main/othervm/native -agentlib:AsyncGetCallTraceTest MyPackage.ASGCTBaseTest */ Index: jdk17u-jdk-17.0.5-8/test/jdk/jdk/jfr/event/os/TestCPUInformation.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/jdk/jdk/jfr/event/os/TestCPUInformation.java +++ jdk17u-jdk-17.0.5-8/test/jdk/jdk/jfr/event/os/TestCPUInformation.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -52,8 +52,8 @@ public class TestCPUInformation { Events.assertField(event, "hwThreads").atLeast(1); Events.assertField(event, "cores").atLeast(1); Events.assertField(event, "sockets").atLeast(1); - Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "ARM", "PPC", "PowerPC", "AArch64", "s390"); - Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "ARM", "PPC", "PowerPC", "AArch64", "s390"); + Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); + Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); } } } Index: jdk17u-jdk-17.0.5-8/test/lib-test/jdk/test/lib/TestMutuallyExclusivePlatformPredicates.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/lib-test/jdk/test/lib/TestMutuallyExclusivePlatformPredicates.java +++ jdk17u-jdk-17.0.5-8/test/lib-test/jdk/test/lib/TestMutuallyExclusivePlatformPredicates.java @@ -45,7 +45,7 @@ import java.util.Set; */ public class TestMutuallyExclusivePlatformPredicates { private static enum MethodGroup { - ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isX64", "isX86"), + ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isX64", "isX86"), BITNESS("is32bit", "is64bit"), OS("isAix", "isLinux", "isOSX", "isWindows"), VM_TYPE("isClient", "isServer", "isMinimal", "isZero", "isEmbedded"), Index: jdk17u-jdk-17.0.5-8/test/lib/jdk/test/lib/Platform.java =================================================================== --- jdk17u-jdk-17.0.5-8.orig/test/lib/jdk/test/lib/Platform.java +++ jdk17u-jdk-17.0.5-8/test/lib/jdk/test/lib/Platform.java @@ -195,6 +195,10 @@ public class Platform { return isArch("arm.*"); } + public static boolean isRISCV64() { + return isArch("riscv64"); + } + public static boolean isPPC() { return isArch("ppc.*"); }