Source code for pairipcore.insn

from ._types import addr_t
from .context import VM


# -----------------------------------------------------------------------------
# instruction formats
# -----------------------------------------------------------------------------
[docs] class InsnFormat: __info_begin_: int """Specifies the offset where the instruction info can be found.""" __stack_: int """Specifies the number of variables taken from the compiled stack.""" __extra_: int """Specifies the number of additional variables loaded.""" __store_: int """Specifies the variable containing the destination address.""" # All five variables here (typing only) A: int B: int C: int D: int E: int
[docs] def __init__( self, info_off: int, stack_vars: int = 0, extra_vars: int = 0, storage_var: str | None = None, ) -> None: self.__info_begin_ = info_off self.__stack_ = stack_vars self.__extra_ = extra_vars self.__fmt_ = f"{stack_vars}{extra_vars}{(storage_var or 'X').lower()}" name_idx = ord("A") if stack_vars > 0: for i in range(stack_vars): setattr(self, chr(name_idx), i) name_idx += 1 if extra_vars > 0: for i in range(extra_vars): setattr(self, chr(name_idx), info_off + 0x1A + i) name_idx += 1 self.__store_ = -1 if storage_var is not None and storage_var.upper() not in ("X", "Z"): if extra_vars <= 0 and stack_vars <= 0: raise ValueError( f"Storage variable {storage_var!r} is not defined, " "because no variables are loaded!" ) self.__store_ = getattr(self, storage_var.upper())
[docs] @staticmethod def parse(format_id: str) -> "InsnFormat": """Parses a format ID string to create an InsnFormat object. Args: format_id (str): A string representing the instruction format. Returns: InsnFormat: The corresponding InsnFormat object. Raises: ValueError: If the format ID is empty. """ if not format_id: raise ValueError("Empty instruction format not allowed!") stack_vars = int(format_id[0]) extra_vars = int(format_id[1]) storage_var = None if len(format_id) >= 3: storage_var = format_id[2] return InsnFormat(stack_vars * 4, stack_vars, extra_vars, storage_var)
def __len__(self) -> int: """Calculates the total length of the instruction format. Returns: int: The length of the instruction format. """ return 0x16 + ((self.__stack_ + self.__extra_) * 4) @property def next_addr_off(self) -> int: """Calculates the offset for the next instruction address.""" return self.__info_begin_ + 0x12 @property def fallback_addr_off(self) -> int: """Calculates the offset for the fallback instruction address.""" return self.__info_begin_ + 0x16 @property def hash_off(self) -> int: """Calculates the offset for the hash value.""" return self.__info_begin_ + 0x04 @property def hash_xor_value_off(self) -> int: """Calculates the offset for the XOR value used in the hash.""" return self.__info_begin_ @property def hash_data_off(self) -> int: """Calculates the offset for the hash data.""" return self.__info_begin_ + 0x0C @property def hash_data_length_off(self) -> int: """Calculates the offset for the length of the hash data. Returns: int: The offset for the length of the hash data. """ return self.__info_begin_ + 0x10 @property def store_var_off(self) -> int: """Returns the offset of the storage variable. Returns: int: The offset of the storage variable. """ return self.__store_
[docs] def vars(self) -> dict[str, int]: """Returns the offsets of each stack variable. Returns: dict[str, int]: A dictionary mapping variable names (A, B, C, etc.) to their offsets. """ return { chr(65 + i): getattr(self, chr(65 + i)) for i in range(self.__stack_ + self.__extra_) }
def __str__(self) -> str: return self.__fmt_ @property def stack_vars(self) -> int: return self.__stack_ @property def extra_vars(self) -> int: return self.__extra_
# ----------------------------------------------------------------------------- # instructions # -----------------------------------------------------------------------------
[docs] class InsnInfo: """ Represents the metadata and control flow information associated with a particular instruction in the VM. """ __next_: addr_t """Next address on equal hash""" __fallback_: addr_t """Fallback address on hash verification failure""" __hash_: int """Expected hash value for verification""" __hash_data_: addr_t """Memory address where the data to be hashed is located""" __hash_data_len_: int """Length of the data to be hashed""" __hash_xor_value_: addr_t """Memory address of the XOR value used in the hash calculation"""
[docs] def __init__(self, vm: VM, insn_format: InsnFormat) -> None: self.__hash_xor_value_ = vm.context.addr(insn_format.hash_xor_value_off) self.__hash_ = vm.context.u64(vm.context.pc + insn_format.hash_off) self.__hash_data_ = vm.context.addr(insn_format.hash_data_off) self.__hash_data_len_ = vm.context.u16( vm.context.pc + insn_format.hash_data_length_off ) self.__fallback_ = vm.context.addr(insn_format.fallback_addr_off) self.__next_ = vm.context.addr(insn_format.next_addr_off)
@property def hash(self) -> int: """Returns the expected hash value for verification.""" return self.__hash_ @property def hash_data_addr(self) -> addr_t: """Returns the memory address where the data to be hashed is located.""" return self.__hash_data_ @property def hash_data_length(self) -> int: """Returns the length of the data to be hashed.""" return self.__hash_data_len_ @property def xor_value_addr(self) -> addr_t: """Returns the memory address of the XOR value used in the hash calculation.""" return self.__hash_xor_value_ @property def fallback(self) -> addr_t: """Returns the address of the fallback instruction if the hash verification fails.""" return self.__fallback_ @property def next(self) -> addr_t: """Returns the address of the next instruction to execute if the hash verification succeeds.""" return self.__next_
[docs] class Insn: """ Represents an instruction in the virtual machine, including its format, address, and associated metadata. """ __format_: InsnFormat """Defines the instruction format for this instruction.""" __address_: addr_t """Address of this instruction (without opcode).""" __info_: InsnInfo """Metadata and control flow information associated with this instruction.""" # Addresses associated with the operands of the instruction. A: addr_t B: addr_t C: addr_t D: addr_t E: addr_t #: Address where the result of the instruction is stored. R: addr_t
[docs] def __init__(self, vm: VM, insn_format: InsnFormat) -> None: self.__format_ = insn_format self.__address_ = vm.context.pc self.__info_ = InsnInfo(vm, insn_format) # Set the addresses for the operands (A, B, C, D, E) based on the instruction format for name, offset in insn_format.vars().items(): setattr(self, name, vm.context.addr(offset)) # Set the address for the result (R) if applicable if insn_format.store_var_off != -1: setattr(self, "R", vm.context.addr(insn_format.store_var_off))
@property def address(self) -> addr_t: """Returns the address of this instruction (excluding the opcode).""" return self.__address_ @property def opcode_address(self) -> addr_t: """Returns the address where the opcode for this instruction is located.""" return self.address - 2 @property def insn_format(self) -> InsnFormat: """Returns the format of this instruction.""" return self.__format_ @property def info(self) -> InsnInfo: """Returns the metadata and control flow information associated with this instruction.""" return self.__info_ def __repr__(self) -> str: return f"<Insn {self.__format_!s} [{self.info.hash_data_length}]>"
# ----------------------------------------------------------------------------- # opcode mapping # ----------------------------------------------------------------------------- # Instruction Format IDs # This file documents all identified instruction formats based on initial analysis. # The naming convention assigns a unique ID to each format according to their behavior. # Format IDs typically consist of three characters: two digits followed by a letter. # - The first digit represents the number of variables taken from the compiled stack. # - The second digit represents the number of additional variables that must be loaded. # - The final letter indicates which variable stores the destination address. This # address is where the result of the operation will be stored, if applicable. # # For example, the format "20a" indicates: # - Two variables are read from the compiled stack. # - No extra variables are loaded. # - The operation's result is stored in variable 'a'. # # The total length of an instruction can be calculated by adding the size of the # instruction info structure to the size required for the variables: # size := <info-size> + ( <var-count> * 4 ) # # Note: An additional special letter 'z' is used to indicate that no additional # data is stored in memory. # Dictionary that maps opcodes to their corresponding instruction formats. FormatIDs: dict[int, InsnFormat] = {} # Opcode IDs # Similar to format IDs, opcode IDs are defined by a certain naming scheme. # The opcode ID consists of the instruction mnemonic name, followed by optional type # specifications (e.g., UInt, Byte), and finally the instruction format ID. # Dictionary that maps opcodes to their corresponding mnemonic names. OpcodeIDs: dict[int, str] = {}
[docs] def O(name: str, opcode: int, format_id: str) -> int: #: noqa """ Registers an opcode with its corresponding format and name. This function associates an opcode with a specific instruction format (represented by the format ID) and a mnemonic name. The format and name are stored in the :code:`FormatIDs` and :code:`OpcodeIDs` dictionaries, respectively. Args: name (str): The mnemonic name of the opcode. If the name is :code:`"_"`, it will be auto-generated based on the format ID. opcode (int): The numeric opcode to be registered. format_id (str): The format ID string that describes the structure of the instruction. Returns: int: The opcode that was registered. """ FormatIDs[opcode] = InsnFormat.parse(format_id) OpcodeIDs[opcode] = name if name != "_" else f"_{format_id}" return opcode
# TODO: shceduled for removal # fmt: off # Name | Opcode | human-oriented syntax O("_", 0x00, "13x") # O("_", 0x05, "21x") # O("_", 0x06, "30x") # O("Assign_UInt_02b", 0x07, "02b") # b = a O("_", 0x08, "11x") # O("_", 0x09, "03x") # O("_", 0x0C, "32x") # O("_", 0x0d, "03x") # O("_", 0x0e, "30x") # O("_", 0x0f, "40x") # O("Compare_ULong_12b", 0x10, "12b") # b = c - a (-1, 0 or 1) O("_", 0x1C, "12x") # O("_", 0x11, "20x") # O("Assign_UInt_02a", 0x12, "02a") # a = b O("Assign_Byte_11a", 0x13, "11a") # a = b O("Assign_ULong_20a", 0x15, "20a") # a = b O("_", 0x18, "11x") # O("_", 0x19, "40x") # O("Compate_Double_03a", 0x1B, "03a") # a = c - b (-1, 0 or 1) O("Div_Int_03b", 0x1F, "03b") # b = a / c O("_", 0x1a, "20x") # O("_", 0x1e, "11x") # O("_", 0x21, "22x") # O("Add_Int_30c", 0x25, "30c") # c = a + b O("_", 0x26, "11x") # O("CastToInt_22d", 0x28, "22d") # d = convert(src=c, dst=a, val=b) O("_", 0x29, "20x") # O("CastToFloat_04a", 0x2A, "04a") # a = convert(src=c, dst=d, val=b) O("Assign_Long_20a", 0x2E, "20a") # a = b O("Div_Double_12b", 0x2c, "12b") # b = a / c O("_", 0x2d, "12x") # O("_", 0x2f, "12x") # O("_", 0x30, "40x") # O("_", 0x33, "10x") # O("_", 0x34, "21x") # O("_", 0x35, "21x") # O("_", 0x37, "23x") # O("_", 0x38, "40x") # O("_", 0x3a, "11x") # O("_", 0x3b, "21x") # O("_", 0x3d, "41x") # O("Alloc_Vector_01a", 0x41, "01a") # O("_", 0x42, "03x") # O("_", 0x43, "12x") # O("_", 0x44, "11x") # O("_", 0x46, "01x") # O("_", 0x47, "30x") # O("_", 0x48, "30x") # O("_", 0x49, "30x") # O("_", 0x4a, "02x") # O("_", 0x4b, "21x") # O("_", 0x4c, "13x") # O("_", 0x4d, "21x") # O("_", 0x4f, "30x") # O("_", 0x50, "30x") # O("_", 0x54, "03x") # O("_", 0x57, "20x") # O("Alloc_Vector_01a", 0x58, "01a") # O("_", 0x59, "41x") # O("_", 0x5a, "03x") # O("_", 0x5b, "20x") # O("_", 0x5c, "12x") # O("_", 0x5e, "40x") # O("_", 0x5f, "30x") # O("_", 0x60, "21x") # O("_", 0x64, "30x") # O("_", 0x65, "30x") # O("_", 0x66, "21x") # O("_", 0x68, "11z") # O("_", 0x6b, "30x") # O("_", 0x6c, "10x") # O("_", 0x6d, "11x") # O("_", 0x6e, "02x") # O("_", 0x70, "22x") # O("_", 0x71, "03x") # O("_", 0x72, "03x") # O("_", 0x73, "21x") # O("_", 0x74, "30x") # O("_", 0x75, "00x") # O("_", 0x76, "20x") # O("_", 0x77, "12x") # O("_", 0x7a, "20x") # O("_", 0x7b, "30x") # O("_", 0x7e, "30x") # O("_", 0x82, "20x") # O("_", 0x83, "11x") # O("_", 0x84, "22x") # O("_", 0x85, "20x") # O("_", 0x87, "03x") # O("_", 0x89, "12x") # O("_", 0x8b, "12x") # O("_", 0x8c, "12x") # O("_", 0x8d, "11x") # O("_", 0x8e, "21x") # O("_", 0x93, "03x") # O("_", 0x95, "12x") # O("_", 0x96, "11x") # O("_", 0x98, "20x") # O("_", 0x9a, "32x") #