From aec46b9d83a8c1f3286fee993ea92a36c4e542b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20N=C3=BCcke?= Date: Thu, 17 Sep 2020 11:20:35 +0200 Subject: [PATCH] Trace translator is now threaded. Traces are abstract classes instead of interfaces for INVOKEVIRTUAL vs INVOKEINTERFACE. Traces store the cpu they've been built for in a field now. --- .../java/li/cil/circuity/vm/riscv/R5CPU.java | 127 +++++++++++---- .../li/cil/circuity/vm/riscv/dbt/Trace.java | 8 +- .../cil/circuity/vm/riscv/dbt/Translator.java | 151 ++++++++++-------- .../circuity/vm/riscv/dbt/TranslatorJob.java | 26 +++ 4 files changed, 205 insertions(+), 107 deletions(-) create mode 100644 src/main/java/li/cil/circuity/vm/riscv/dbt/TranslatorJob.java diff --git a/src/main/java/li/cil/circuity/vm/riscv/R5CPU.java b/src/main/java/li/cil/circuity/vm/riscv/R5CPU.java index f65f5dcc..08cf9a7d 100644 --- a/src/main/java/li/cil/circuity/vm/riscv/R5CPU.java +++ b/src/main/java/li/cil/circuity/vm/riscv/R5CPU.java @@ -2,6 +2,8 @@ package li.cil.circuity.vm.riscv; import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; +import it.unimi.dsi.fastutil.ints.IntOpenHashSet; +import it.unimi.dsi.fastutil.ints.IntSet; import li.cil.circuity.api.vm.MemoryMap; import li.cil.circuity.api.vm.MemoryRange; import li.cil.circuity.api.vm.device.InterruptController; @@ -15,6 +17,7 @@ import li.cil.circuity.vm.BitUtils; import li.cil.circuity.vm.device.memory.exception.*; import li.cil.circuity.vm.riscv.dbt.Trace; import li.cil.circuity.vm.riscv.dbt.Translator; +import li.cil.circuity.vm.riscv.dbt.TranslatorJob; import li.cil.circuity.vm.riscv.exception.R5BreakpointException; import li.cil.circuity.vm.riscv.exception.R5ECallException; import li.cil.circuity.vm.riscv.exception.R5Exception; @@ -24,6 +27,9 @@ import org.apache.logging.log4j.Logger; import javax.annotation.Nullable; import java.util.Arrays; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicInteger; /** @@ -47,7 +53,7 @@ import java.util.concurrent.atomic.AtomicInteger; *
  • "C" Standard Extension for Compressed Instructions, Version 2.0
  • * */ -public class R5CPU implements Steppable, RealTimeCounter, InterruptController { +public final class R5CPU implements Steppable, RealTimeCounter, InterruptController { private static final Logger LOGGER = LogManager.getLogger(); public static final int PC_INIT = 0x1000; // Initial position of program counter. @@ -81,8 +87,8 @@ public class R5CPU implements Steppable, RealTimeCounter, InterruptController { // Knobs for tweaking dynamic binary translation. private static final int HOT_TRACE_COUNT = 16; // Must be a power of to for (x - 1) masking. - private static final int TRACE_COUNT_THRESHOLD = 30; - private static final int EXPECTED_MAX_TRACE_COUNT = 1024; + private static final int TRACE_COUNT_THRESHOLD = 20; + private static final int EXPECTED_MAX_TRACE_COUNT = 4 * 1024; /////////////////////////////////////////////////////////////////// // RV32I @@ -143,8 +149,16 @@ public class R5CPU implements Steppable, RealTimeCounter, InterruptController { /////////////////////////////////////////////////////////////////// // Dynamic binary translation - private final HotTrace[] hotTraces = new HotTrace[HOT_TRACE_COUNT]; + private final WatchedTrace[] watchedTraces = new WatchedTrace[HOT_TRACE_COUNT]; private final Int2ObjectMap traces = new Int2ObjectOpenHashMap<>(EXPECTED_MAX_TRACE_COUNT); + private final IntSet tracesRequested = new IntOpenHashSet(EXPECTED_MAX_TRACE_COUNT); + private volatile TranslatorDataExchange translatorDataExchange = new TranslatorDataExchange(); + private static final ExecutorService translators = Executors.newFixedThreadPool( + Math.min(4, Runtime.getRuntime().availableProcessors()), r -> { + final Thread thread = new Thread(r, "RISC-V Translator Thread"); + thread.setDaemon(true); + return thread; + }); /////////////////////////////////////////////////////////////////// // Real time counter -- at least in RISC-V Linux 5.1 the mtime CSR is needed in add_device_randomness @@ -166,8 +180,8 @@ public class R5CPU implements Steppable, RealTimeCounter, InterruptController { storeTLB[i] = new TLBEntry(); } - for (int i = 0; i < hotTraces.length; i++) { - hotTraces[i] = new HotTrace(); + for (int i = 0; i < watchedTraces.length; i++) { + watchedTraces[i] = new WatchedTrace(); } reset(true); @@ -253,6 +267,8 @@ public class R5CPU implements Steppable, RealTimeCounter, InterruptController { return; } + processTranslatedTraces(); + final long cycleLimit = mcycle + cycles; while (!waitingForInterrupt && mcycle < cycleLimit) { final int pending = mip.get() & mie; @@ -261,9 +277,7 @@ public class R5CPU implements Steppable, RealTimeCounter, InterruptController { } try { - if (!trace()) { - interpret(); - } + interpret(); } catch (final LoadPageFaultException e) { raiseException(R5.EXCEPTION_LOAD_PAGE_FAULT, e.getAddress()); } catch (final StorePageFaultException e) { @@ -294,35 +308,57 @@ public class R5CPU implements Steppable, RealTimeCounter, InterruptController { } } - private boolean trace() throws R5Exception, MemoryAccessException { -// traces.computeIfAbsent(pc, this::translateTrace).execute(this); -// return true; + private void runTranslator() { + for (; ; ) { + final TranslatorDataExchange dataExchange = this.translatorDataExchange; + final TranslatorJob request = dataExchange.translatorRequests.poll(); + if (request != null) { + // May return null in case the translator decided the generated trace was too short. + request.trace = Translator.translateTrace(request); + if (request.trace != null) { + dataExchange.translationResponses.add(request); + } + continue; + } - if (traces.containsKey(pc)) { - invokeTrace(traces.get(pc)); - return true; - } else { + break; + } + } + + private void processTranslatedTraces() { + TranslatorJob response; + while ((response = translatorDataExchange.translationResponses.poll()) != null) { + traces.put(response.pc, response.trace); + tracesRequested.remove(response.pc); + } + } + + private void requestTraceTranslation(final MemoryMappedDevice device, final int instOffset, final int instEnd, final int toPC, final int inst) { +// if (tracesRequested.add(pc)) { +// translatorDataExchange.translatorRequests.add(new TranslatorJob(this, pc, device, instOffset, instEnd, toPC, inst)); +// +// translators.submit(this::runTranslator); +// } + + if (!tracesRequested.contains(pc)) { final int hotTraceIndex = (pc >> 1) & (HOT_TRACE_COUNT - 1); - final HotTrace hotTrace = hotTraces[hotTraceIndex]; - if (hotTrace.pc != pc) { - hotTrace.pc = pc; - hotTrace.count = 1; - } else if (++hotTrace.count >= TRACE_COUNT_THRESHOLD) { - hotTrace.pc = -1; + final WatchedTrace watchedTrace = watchedTraces[hotTraceIndex]; + if (watchedTrace.pc != pc) { + watchedTrace.pc = pc; + watchedTrace.count = 1; + } else if (++watchedTrace.count >= TRACE_COUNT_THRESHOLD) { + watchedTrace.pc = -1; - final Trace trace = Translator.translateTrace(this::fetchPage, pc); - traces.put(pc, trace); - invokeTrace(trace); + tracesRequested.add(pc); + translatorDataExchange.translatorRequests.add(new TranslatorJob(this, pc, device, instOffset, instEnd, toPC, inst)); - return true; + translators.submit(this::runTranslator); } } - - return false; } private void invokeTrace(final Trace trace) throws R5Exception, MemoryAccessException { - trace.execute(this); + trace.execute(); } private void interpret() throws R5Exception, MemoryAccessException { @@ -339,8 +375,11 @@ public class R5CPU implements Steppable, RealTimeCounter, InterruptController { // instruction would fully fit a page. The last 16bit in a page may be the start of // a 32bit instruction spanning two pages, a special case we handle outside the loop. - // Note: this code is duplicated in Translator.translateTrace(). Moving this to a - // separate state class slows things down by 5-10%, sadly. + if (traces.containsKey(pc)) { + invokeTrace(traces.get(pc)); + return; + } + final TLBEntry cache = fetchPage(pc); int instOffset = pc + cache.toOffset; final int instEnd = instOffset - (pc & R5.PAGE_ADDRESS_MASK) // Page start. @@ -358,6 +397,8 @@ public class R5CPU implements Steppable, RealTimeCounter, InterruptController { } } + requestTraceTranslation(cache.device, instOffset, instEnd, toPC, inst); + try { // Catch any exceptions to patch PC field. for (; ; ) { // End of page check at the bottom since we enter with a valid inst. mcycle++; @@ -1438,6 +1479,8 @@ public class R5CPU implements Steppable, RealTimeCounter, InterruptController { } else { flushTLB(x[rs1]); } + + flushTraces(); } private boolean csrrw(final int inst, final int rd, final int rs1, final int funct3, final int csr) throws R5Exception { @@ -2104,6 +2147,7 @@ public class R5CPU implements Steppable, RealTimeCounter, InterruptController { satp = validatedValue; flushTLB(); + flushTraces(); return true; // Invalidate fetch cache. } @@ -2235,6 +2279,8 @@ public class R5CPU implements Steppable, RealTimeCounter, InterruptController { ((mstatus & R5.STATUS_MPRV_MASK) != 0 && (change & R5.STATUS_MPP_MASK) != 0); if (mmuConfigChanged) { flushTLB(); + + // TODO Need multiple trace lists for each combination of MPRV&MPP, SUM, MXR and priv, otherwise we have to flush traces here. } fs = (byte) ((value & R5.STATUS_FS_MASK) >>> R5.STATUS_FS_SHIFT); @@ -2251,6 +2297,8 @@ public class R5CPU implements Steppable, RealTimeCounter, InterruptController { flushTLB(); priv = level; + + // TODO Need multiple trace lists for each combination of MPRV&MPP, SUM, MXR and priv, otherwise we have to flush traces here. } private void raiseException(final int cause, final int value) { @@ -2594,9 +2642,17 @@ public class R5CPU implements Steppable, RealTimeCounter, InterruptController { private void flushTraces() { for (int i = 0; i < HOT_TRACE_COUNT; i++) { - hotTraces[i].pc = -1; + watchedTraces[i].pc = -1; } + traces.clear(); + tracesRequested.clear(); + + // We simply swap out for a new set of queues to avoid synchronization. + // This way any running workers will put their "in flight" result into + // an old queue from which we will not read anymore, and fetch their + // next job from the new queue. + translatorDataExchange = new TranslatorDataExchange(); } private enum AccessType { @@ -2618,11 +2674,16 @@ public class R5CPU implements Steppable, RealTimeCounter, InterruptController { public MemoryMappedDevice device; } - private static final class HotTrace { + private static final class WatchedTrace { public int pc = -1; public int count; } + private static final class TranslatorDataExchange { + public final ConcurrentLinkedQueue translatorRequests = new ConcurrentLinkedQueue<>(); + public final ConcurrentLinkedQueue translationResponses = new ConcurrentLinkedQueue<>(); + } + public R5CPUStateSnapshot getState() { final R5CPUStateSnapshot state = new R5CPUStateSnapshot(); diff --git a/src/main/java/li/cil/circuity/vm/riscv/dbt/Trace.java b/src/main/java/li/cil/circuity/vm/riscv/dbt/Trace.java index 0c0124ce..51a30c7c 100644 --- a/src/main/java/li/cil/circuity/vm/riscv/dbt/Trace.java +++ b/src/main/java/li/cil/circuity/vm/riscv/dbt/Trace.java @@ -1,10 +1,8 @@ package li.cil.circuity.vm.riscv.dbt; import li.cil.circuity.api.vm.device.memory.MemoryAccessException; -import li.cil.circuity.vm.riscv.R5CPU; import li.cil.circuity.vm.riscv.exception.R5Exception; -@FunctionalInterface -public interface Trace { - void execute(final R5CPU cpu) throws R5Exception, MemoryAccessException; -} +public abstract class Trace { + public abstract void execute() throws R5Exception, MemoryAccessException; +} \ No newline at end of file diff --git a/src/main/java/li/cil/circuity/vm/riscv/dbt/Translator.java b/src/main/java/li/cil/circuity/vm/riscv/dbt/Translator.java index baa1fd0e..92c1fa6e 100644 --- a/src/main/java/li/cil/circuity/vm/riscv/dbt/Translator.java +++ b/src/main/java/li/cil/circuity/vm/riscv/dbt/Translator.java @@ -4,7 +4,6 @@ import li.cil.circuity.api.vm.device.memory.MemoryAccessException; import li.cil.circuity.api.vm.device.memory.Sizes; import li.cil.circuity.vm.BitUtils; import li.cil.circuity.vm.UnsafeGetter; -import li.cil.circuity.vm.riscv.R5; import li.cil.circuity.vm.riscv.R5CPU; import li.cil.circuity.vm.riscv.exception.R5BreakpointException; import li.cil.circuity.vm.riscv.exception.R5ECallException; @@ -21,10 +20,11 @@ import java.util.Map; public final class Translator { private static final Unsafe UNSAFE = UnsafeGetter.get(); - private static final String TRACE_EXECUTE_NAME = "execute"; - private static final String TRACE_EXECUTE_DESC = "(Lli/cil/circuity/vm/riscv/R5CPU;)V"; + // Threshold of instructions to emit to generate class. There's a point where they + // can be too small to justify the overhead of calling them (since they have to be + // called via INVOKEVIRTUAL). + private static final int MIN_INSTRUCTIONS = 2;//9; - private static final Type OBJECT_TYPE = Type.getType(Object.class); private static final Type CPU_TYPE = Type.getType(R5CPU.class); private static final Type TRACE_TYPE = Type.getType(Trace.class); private static final Type ECALL_EXCEPTION_TYPE = Type.getType(R5ECallException.class); @@ -34,8 +34,10 @@ public final class Translator { private static final org.objectweb.asm.commons.Method INIT_VOID = org.objectweb.asm.commons.Method.getMethod("void ()"); private static final org.objectweb.asm.commons.Method INIT_INT = org.objectweb.asm.commons.Method.getMethod("void (int)"); + private static final String CPU_FIELD_NAME = "cpu"; + // First argument to the method, the reference to the CPU we're working on. - private static final int CPU_ARG_INDEX = 1; // R5CPU ref, length = 1 + private static final int CPU_LOCAL_INDEX = 1; // R5CPU ref, length = 1 // On-demand updated local holding current actual PC. // Used to bake instOffset for pc fixup on runtime exceptions. @@ -47,32 +49,40 @@ public final class Translator { // Cached opcode implementations by name for faster lookup in generation. private static final Map OPCODE_METHODS = new HashMap<>(); + private final TranslatorJob request; + private final String className; private final MethodVisitor mv; - private final InstructionAccess fetch; private int instOffset; - private int toPC; + int emittedInstructions; - private Translator(final MethodVisitor mv, final InstructionAccess fetch) { + private Translator(final TranslatorJob request, final String className, final MethodVisitor mv) { + this.request = request; + this.className = className; this.mv = mv; - this.fetch = fetch; } - public static Trace translateTrace(final InstructionAccess access, final int pc) { - final ClassWriter cw = new ClassWriter(ClassWriter.COMPUTE_FRAMES); + @Nullable + public static Trace translateTrace(final TranslatorJob request) { + final String className = TRACE_TYPE.getInternalName() + "$" + Integer.toHexString(request.pc); + final ClassWriter cw = new ClassWriter(ClassWriter.COMPUTE_FRAMES); cw.visit(Opcodes.V1_8, Opcodes.ACC_PUBLIC + Opcodes.ACC_FINAL, - TRACE_TYPE.getInternalName() + "$" + Integer.toHexString(pc), + className, null, - OBJECT_TYPE.getInternalName(), - new String[]{TRACE_TYPE.getInternalName()}); + TRACE_TYPE.getInternalName(), + null); - generateDefaultConstructor(cw); - generateExecuteMethod(cw, access, pc); + cw.visitField(Opcodes.ACC_PRIVATE + Opcodes.ACC_FINAL, CPU_FIELD_NAME, CPU_TYPE.getDescriptor(), null, null); + + generateConstructor(cw, className); + if (!generateExecuteMethod(cw, className, request)) { + return null; + } cw.visitEnd(); - return instantiateTrace(defineClass(cw.toByteArray())); + return instantiateTrace(defineClass(cw.toByteArray()), request.cpu); } private static Class defineClass(final byte[] data) { @@ -81,52 +91,62 @@ public final class Translator { return traceClass; } - private static Trace instantiateTrace(final Class traceClass) { + private static Trace instantiateTrace(final Class traceClass, final R5CPU cpu) { try { - return (Trace) UNSAFE.allocateInstance(traceClass); - } catch (final InstantiationException e) { - throw new AssertionError(); + return traceClass.getDeclaredConstructor(R5CPU.class).newInstance(cpu); + } catch (final Throwable e) { + throw new AssertionError("Failed instantiating trace.", e); } } - private static void generateDefaultConstructor(final ClassVisitor cv) { - final MethodVisitor mv = cv.visitMethod(Opcodes.ACC_PUBLIC, "", "()V", null, null); + private static void generateConstructor(final ClassVisitor cv, final String className) { + final MethodVisitor mv = cv.visitMethod(Opcodes.ACC_PUBLIC, "", Type.getMethodDescriptor(Type.VOID_TYPE, CPU_TYPE), null, null); mv.visitCode(); mv.visitVarInsn(Opcodes.ALOAD, 0); - mv.visitMethodInsn(Opcodes.INVOKESPECIAL, OBJECT_TYPE.getInternalName(), INIT_VOID.getName(), INIT_VOID.getDescriptor(), false); + mv.visitMethodInsn(Opcodes.INVOKESPECIAL, TRACE_TYPE.getInternalName(), INIT_VOID.getName(), INIT_VOID.getDescriptor(), false); + mv.visitVarInsn(Opcodes.ALOAD, 0); + mv.visitVarInsn(Opcodes.ALOAD, 1); + mv.visitFieldInsn(Opcodes.PUTFIELD, className, CPU_FIELD_NAME, CPU_TYPE.getDescriptor()); mv.visitInsn(Opcodes.RETURN); mv.visitMaxs(-1, -1); mv.visitEnd(); } - private static void generateExecuteMethod(final ClassVisitor cv, final InstructionAccess access, final int pc) { - final MethodVisitor mv = cv.visitMethod(Opcodes.ACC_PUBLIC + Opcodes.ACC_FINAL, TRACE_EXECUTE_NAME, TRACE_EXECUTE_DESC, null, new String[]{ + private static boolean generateExecuteMethod(final ClassVisitor cv, final String className, final TranslatorJob request) { + final MethodVisitor mv = cv.visitMethod(Opcodes.ACC_PUBLIC + Opcodes.ACC_FINAL, "execute", "()V", null, new String[]{ Type.getInternalName(R5Exception.class), Type.getInternalName(MemoryAccessException.class) }); mv.visitCode(); - new Translator(mv, access).translateTrace(pc); + final Translator translator = new Translator(request, className, mv); + translator.translateTrace(); + if (translator.emittedInstructions < MIN_INSTRUCTIONS) { + return false; + } mv.visitMaxs(-1, -1); mv.visitEnd(); + return true; } - private void translateTrace(final int startPC) { + private void translateTrace() { final Label startLabel = new Label(); final Label returnLabel = new Label(); final Label catchLabel = new Label(); final Label endLabel = new Label(); - mv.visitLocalVariable("currentPC", Type.INT_TYPE.getInternalName(), null, startLabel, endLabel, PC_LOCAL_INDEX); - mv.visitLocalVariable("mcycle", Type.LONG_TYPE.getInternalName(), null, startLabel, returnLabel, MCYCLE_LOCAL_INDEX); + mv.visitLocalVariable("cpu", CPU_TYPE.getDescriptor(), null, startLabel, endLabel, CPU_LOCAL_INDEX); + mv.visitLocalVariable("currentPC", Type.INT_TYPE.getDescriptor(), null, startLabel, endLabel, PC_LOCAL_INDEX); + mv.visitLocalVariable("mcycle", Type.LONG_TYPE.getDescriptor(), null, startLabel, returnLabel, MCYCLE_LOCAL_INDEX); mv.visitTryCatchBlock(startLabel, returnLabel, catchLabel, null); + generateCPULocal(); generatePCLocal(); generateMcycleLocal(); @@ -134,30 +154,17 @@ public final class Translator { try { // Catch illegal instruction exceptions to generate final throw instruction. - // Note: this code is duplicated in R5CPU.interpret(). Moving this to a - // separate state class slows things down by 5-10%, sadly. - final R5CPU.TLBEntry cache = fetch.fetchPage(startPC); - instOffset = startPC + cache.toOffset; - toPC = -cache.toOffset; - final int instEnd = instOffset - (startPC & R5.PAGE_ADDRESS_MASK) // Page start. - + ((1 << R5.PAGE_ADDRESS_SHIFT) - 2); // Page size minus 16bit. - - int inst; - if (instOffset < instEnd) { // Likely case, instruction fully inside page. - inst = cache.device.load(instOffset, Sizes.SIZE_32_LOG2); - } else { // Unlikely case, instruction may leave page if it is 32bit. - inst = cache.device.load(instOffset, Sizes.SIZE_16_LOG2) & 0xFFFF; - if ((inst & 0b11) == 0b11) { // 32bit instruction. - final R5CPU.TLBEntry highCache = fetch.fetchPage(startPC + 2); - inst |= highCache.device.load(startPC + 2 + highCache.toOffset, Sizes.SIZE_16_LOG2) << 16; - } - } + instOffset = request.instOffset; + final int instEnd = request.instEnd; + int inst = request.firstInst; // TODO trim nops completely, i.e. anything where rd = 0 that just computes and writes to it // TODO pre-resolve more switches in op methods // TODO test if incrementing instOffset/pc in generated method is better than generating a ton of constants for (; ; ) { // End of page check at the bottom since we enter with a valid inst. + emittedInstructions++; + incCycle(); if ((inst & 0b11) == 0b11) { @@ -181,7 +188,7 @@ public final class Translator { } case 0b0010111: { // AUIPC - invokeOp("auipc", inst, rd, instOffset + toPC); + invokeOp("auipc", inst, rd, instOffset + request.toPC); instOffset += 4; break; @@ -195,17 +202,17 @@ public final class Translator { } case 0b1101111: { // JAL - invokeOp("jal", inst, rd, instOffset + toPC); + invokeOp("jal", inst, rd, instOffset + request.toPC); return; // May have jumped out of page. Also avoid infinite loops. } case 0b110_0111: { // JALR - invokeOp("jalr", inst, rd, rs1, instOffset + toPC); + invokeOp("jalr", inst, rd, rs1, instOffset + request.toPC); return; // May have jumped out of page. Also avoid infinite loops. } case 0b1100011: { // BRANCH - invokeOp(boolean.class, "branch", inst, rs1, instOffset + toPC); + invokeOp(boolean.class, "branch", inst, rs1, instOffset + request.toPC); mv.visitJumpInsn(Opcodes.IFNE, returnLabel); instOffset += 4; @@ -268,8 +275,8 @@ public final class Translator { storePCInLocal(); mv.visitTypeInsn(Opcodes.NEW, ECALL_EXCEPTION_TYPE.getInternalName()); mv.visitInsn(Opcodes.DUP); - mv.visitVarInsn(Opcodes.ALOAD, CPU_ARG_INDEX); - mv.visitFieldInsn(Opcodes.GETFIELD, CPU_TYPE.getInternalName(), "priv", Type.INT_TYPE.getInternalName()); + mv.visitVarInsn(Opcodes.ALOAD, CPU_LOCAL_INDEX); + mv.visitFieldInsn(Opcodes.GETFIELD, CPU_TYPE.getInternalName(), "priv", Type.INT_TYPE.getDescriptor()); mv.visitMethodInsn(Opcodes.INVOKESPECIAL, ECALL_EXCEPTION_TYPE.getInternalName(), INIT_INT.getName(), INIT_INT.getDescriptor(), false); mv.visitInsn(Opcodes.ATHROW); return; @@ -433,7 +440,7 @@ public final class Translator { break; } case 0b001: { // C.JAL - invokeOp("c_jal", inst, instOffset + toPC); + invokeOp("c_jal", inst, instOffset + request.toPC); return; // May have jumped out of page. Also avoid infinite loops. } case 0b010: { // C.LI @@ -503,12 +510,12 @@ public final class Translator { break; } case 0b101: { // C.J - invokeOp("c_j", inst, instOffset + toPC); + invokeOp("c_j", inst, instOffset + request.toPC); return; // May have jumped out of page. Also avoid infinite loops. } case 0b110: // C.BEQZ case 0b111: { // C.BNEZ - invokeOp(boolean.class, "c_branch", inst, funct3, instOffset + toPC); + invokeOp(boolean.class, "c_branch", inst, funct3, instOffset + request.toPC); mv.visitJumpInsn(Opcodes.IFNE, returnLabel); instOffset += 2; @@ -569,7 +576,7 @@ public final class Translator { mv.visitInsn(Opcodes.ATHROW); return; } else { // C.JALR - invokeOp("c_jalr", rd, instOffset + toPC); + invokeOp("c_jalr", rd, instOffset + request.toPC); return; // May have jumped out of page. Also avoid infinite loops. } } else { // C.ADD @@ -604,7 +611,7 @@ public final class Translator { } if (instOffset < instEnd) { // Likely case: we're still fully in the page. - inst = cache.device.load(instOffset, Sizes.SIZE_32_LOG2); + inst = request.device.load(instOffset, Sizes.SIZE_32_LOG2); } else { // Unlikely case: we reached the end of the page. Leave to do interrupts and cycle check. storePCInCPU(); return; @@ -631,34 +638,40 @@ public final class Translator { mv.visitLabel(catchLabel); - mv.visitVarInsn(Opcodes.ALOAD, CPU_ARG_INDEX); + mv.visitVarInsn(Opcodes.ALOAD, CPU_LOCAL_INDEX); mv.visitVarInsn(Opcodes.ILOAD, PC_LOCAL_INDEX); - mv.visitFieldInsn(Opcodes.PUTFIELD, CPU_TYPE.getInternalName(), "pc", Type.INT_TYPE.getInternalName()); + mv.visitFieldInsn(Opcodes.PUTFIELD, CPU_TYPE.getInternalName(), "pc", Type.INT_TYPE.getDescriptor()); mv.visitInsn(Opcodes.ATHROW); mv.visitLabel(endLabel); } } + private void generateCPULocal() { + mv.visitVarInsn(Opcodes.ALOAD, 0); + mv.visitFieldInsn(Opcodes.GETFIELD, className, CPU_FIELD_NAME, CPU_TYPE.getDescriptor()); + mv.visitVarInsn(Opcodes.ASTORE, CPU_LOCAL_INDEX); + } + private void generatePCLocal() { mv.visitLdcInsn(instOffset); mv.visitVarInsn(Opcodes.ISTORE, PC_LOCAL_INDEX); } private void generateMcycleLocal() { - mv.visitVarInsn(Opcodes.ALOAD, CPU_ARG_INDEX); - mv.visitFieldInsn(Opcodes.GETFIELD, CPU_TYPE.getInternalName(), "mcycle", Type.LONG_TYPE.getInternalName()); + mv.visitVarInsn(Opcodes.ALOAD, CPU_LOCAL_INDEX); + mv.visitFieldInsn(Opcodes.GETFIELD, CPU_TYPE.getInternalName(), "mcycle", Type.LONG_TYPE.getDescriptor()); mv.visitVarInsn(Opcodes.LSTORE, MCYCLE_LOCAL_INDEX); } private void incCycle() { - mv.visitVarInsn(Opcodes.ALOAD, CPU_ARG_INDEX); + mv.visitVarInsn(Opcodes.ALOAD, CPU_LOCAL_INDEX); mv.visitVarInsn(Opcodes.LLOAD, MCYCLE_LOCAL_INDEX); mv.visitLdcInsn(1L); mv.visitInsn(Opcodes.LADD); mv.visitInsn(Opcodes.DUP2); mv.visitVarInsn(Opcodes.LSTORE, MCYCLE_LOCAL_INDEX); - mv.visitFieldInsn(Opcodes.PUTFIELD, CPU_TYPE.getInternalName(), "mcycle", Type.LONG_TYPE.getInternalName()); + mv.visitFieldInsn(Opcodes.PUTFIELD, CPU_TYPE.getInternalName(), "mcycle", Type.LONG_TYPE.getDescriptor()); } private void invokeOp(final String methodName, final int... args) { @@ -683,7 +696,7 @@ public final class Translator { storePCInLocal(); } - mv.visitVarInsn(Opcodes.ALOAD, CPU_ARG_INDEX); + mv.visitVarInsn(Opcodes.ALOAD, CPU_LOCAL_INDEX); for (int i = 0; i < args.length; i++) { if (!ClassUtils.isAssignable(method.argTypes[i], int.class)) { @@ -700,7 +713,7 @@ public final class Translator { } private void storePCInLocal() { - mv.visitLdcInsn(instOffset + toPC); + mv.visitLdcInsn(instOffset + request.toPC); mv.visitVarInsn(Opcodes.ISTORE, PC_LOCAL_INDEX); } @@ -709,8 +722,8 @@ public final class Translator { } private void storePCInCPU(final int offset) { - mv.visitVarInsn(Opcodes.ALOAD, CPU_ARG_INDEX); - mv.visitLdcInsn(instOffset + toPC + offset); + mv.visitVarInsn(Opcodes.ALOAD, CPU_LOCAL_INDEX); + mv.visitLdcInsn(instOffset + request.toPC + offset); mv.visitFieldInsn(Opcodes.PUTFIELD, CPU_TYPE.getInternalName(), "pc", Type.INT_TYPE.getInternalName()); } diff --git a/src/main/java/li/cil/circuity/vm/riscv/dbt/TranslatorJob.java b/src/main/java/li/cil/circuity/vm/riscv/dbt/TranslatorJob.java new file mode 100644 index 00000000..c136542b --- /dev/null +++ b/src/main/java/li/cil/circuity/vm/riscv/dbt/TranslatorJob.java @@ -0,0 +1,26 @@ +package li.cil.circuity.vm.riscv.dbt; + +import li.cil.circuity.api.vm.device.memory.MemoryMappedDevice; +import li.cil.circuity.vm.riscv.R5CPU; + +public final class TranslatorJob { + public final R5CPU cpu; + public final int pc; + public final MemoryMappedDevice device; + public final int instOffset; + public final int instEnd; + public final int toPC; + public final int firstInst; + + public Trace trace; + + public TranslatorJob(final R5CPU cpu, final int pc, final MemoryMappedDevice device, final int instOffset, final int instEnd, final int toPC, final int firstInst) { + this.cpu = cpu; + this.pc = pc; + this.device = device; + this.instOffset = instOffset; + this.instEnd = instEnd; + this.toPC = toPC; + this.firstInst = firstInst; + } +}