Source code for easier68k.core.util.parsing

# Parsing utils
from ..enum.ea_mode import EAMode
from ..models.assembly_parameter import AssemblyParameter
from ..enum.op_size import OpSize

[docs]def from_str_util(command: str, parameters: str) -> (OpSize, list, list): """ Util method for from_str Splits the command into both parts, gets the normalized size and splits the parameters >>> from_str_util('MOVE.B', '#123, D0') (<OpSize.BYTE: 1>, ['#123', 'D0'], ['MOVE', 'B']) >>> from_str_util('FAKEOP', '$AAAA, #123, $AAAA') (<OpSize.WORD: 2>, ['$AAAA', '#123', '$AAAA'], ['FAKEOP']) >>> from_str_util('NOPARAM.L', '') (<OpSize.LONG: 4>, [''], ['NOPARAM', 'L']) :param command: the command str :param parameters: the parameters str :return: size {str}, params {list} of str, parts - both sides of the command after split """ parts = command.split('.') # split the command by . if specified if len(parts) == 1: size = 'W' # default size else: size = parts[1] # split parameters params = parameters.split(',') # strip away whitespace for the params for x in range(len(params)): params[x] = params[x].strip() return OpSize.parse(size), params, parts
[docs]def parse_assembly_parameter(addr: str) -> AssemblyParameter: """ Parses an effective addressing mode (such as D0, (A1), #$01) and makes a new AssemblyParameter >>> parse_assembly_parameter('D') Traceback (most recent call last): ... AssertionError >>> str(parse_assembly_parameter('D3')) 'EA Mode: EAMode.DRD, Data: 3' >>> str(parse_assembly_parameter('A6')) 'EA Mode: EAMode.ARD, Data: 6' >>> str(parse_assembly_parameter('(A4)')) 'EA Mode: EAMode.ARI, Data: 4' >>> str(parse_assembly_parameter('(A2)+')) 'EA Mode: EAMode.ARIPI, Data: 2' >>> str(parse_assembly_parameter('(A2)-')) # Invalid, can't do "post-decrement" Traceback (most recent call last): ... AssertionError >>> str(parse_assembly_parameter('($45).W')) 'EA Mode: EAMode.AWA, Data: 69' >>> str(parse_assembly_parameter('(%01010111).L')) 'EA Mode: EAMode.ALA, Data: 87' >>> str(parse_assembly_parameter('#$FF')) 'EA Mode: EAMode.IMM, Data: 255' >>> str(parse_assembly_parameter('#$ABCD')) 'EA Mode: EAMode.IMM, Data: 43981' >>> str(parse_assembly_parameter('#-1')) 'EA Mode: EAMode.IMM, Data: -1' >>> str(parse_assembly_parameter('#-100')) 'EA Mode: EAMode.IMM, Data: -100' >>> str(parse_assembly_parameter('-(A2)')) 'EA Mode: EAMode.ARIPD, Data: 2' """ assert len(addr) >= 2 if addr[0] == 'D': assert len(addr) == 2 assert 0 <= int(addr[1]) <= 7 return AssemblyParameter(EAMode.DRD, int(addr[1])) if addr[0] == 'A': assert len(addr) == 2 assert 0 <= int(addr[1]) <= 7 return AssemblyParameter(EAMode.ARD, int(addr[1])) if addr[0] == '(': # ARI, ARIPI, ALA, or AWA # Parse the inside of the parentheses nested = "" found_paren = False i = 1 while i < len(addr): if addr[i] == ')': found_paren = True break nested += addr[i] i += 1 assert found_paren if addr[1] == 'A': # ARI or ARIPI assert nested[0] == 'A' assert nested[1].isnumeric() assert 0 <= int(nested[1]) <= 7 if i == len(addr) - 1: return AssemblyParameter(EAMode.ARI, int(nested[1])) assert addr[i + 1] == '+' return AssemblyParameter(EAMode.ARIPI, int(nested[1])) # ALA or AWA assert i == len(addr) - 3 assert addr[len(addr) - 1] == 'W' or addr[len(addr) - 1] == 'L' return AssemblyParameter(EAMode.AWA if addr[len(addr) - 1] == 'W' else EAMode.ALA, parse_literal(nested)) if addr[0] == '#': # IMM return AssemblyParameter(EAMode.IMM, parse_literal(addr[1:])) if addr[0] == '-': # ARIPD assert len(addr) == 5 assert addr[1] == '(' assert addr[2] == 'A' assert addr[3].isnumeric() assert 0 <= int(addr[3]) <= 7 assert addr[4] == ')' return AssemblyParameter(EAMode.ARIPD, int(addr[3])) return None
[docs]def parse_literal(literal: str) -> int: """ Parses a literal (aka "1234" or "$A0F" or "%1001") >>> parse_literal('$BA1') 2977 >>> parse_literal('%01010111') 87 >>> parse_literal('57') 57 >>> parse_literal('400') 400 >>> parse_literal('-100') -100 >>> parse_literal('-1') -1 :param literal: A string containing the literal to parse :return: The parsed literal (a bytearray type) """ if literal[0] == '$': # Parsing a hex literal if len(literal) % 2 == 0: literal = literal[0] + '0' + literal[1:] return int(literal[1:], 16) if literal[0] == '%': # Parsing a binary literal assert (len(literal) - 1) % 4 == 0 # Has to be divisible by 4 to convert into hex hexed = hex(int(literal[1:], 2))[2:] if len(hexed) % 2 == 1: hexed = '0' + hexed return int(hexed, 16) # Integer literal hexed = hex(int(literal)).replace('0x', '') #if len(hexed) % 2 == 1: # Odd length string, add 0 to the beginning #hexed = '0' + hexed return int(hexed, 16)
[docs]def strip_comments(line: str) -> str: """ Removes all comments from a line (basically makes this line into the 'compiler' version) >>> strip_comments('label TRAP #15 * This does a thing') 'label TRAP #15 ' >>> strip_comments(' ORG start ;label') ' ORG start ' >>> strip_comments('; ADD D0, D1 * asdf') '' :param line: The line to strip comments from :return: The stripped line """ to_return = '' for c in line: if c == ';' or c == '*': break to_return += c return to_return
[docs]def has_label(line: str) -> bool: """ Returns whether or not this line has a label in it (basically if it starts with a space or not) :param line: The line to test :return: Whether the test line has a label in it >>> has_label('data DC.B $A3') True >>> has_label(' BEQ test') False >>> has_label(' TRAP #15') False >>> has_label(';start EQU $400') False """ stripped = strip_comments(line) if not stripped.strip(): # The line is literally empty after removing comments return False return not strip_comments(line).startswith(' ')
[docs]def get_label(line: str) -> str: """ Returns the label from a line (if it has one, if not returns None) >>> get_label('test DC.B $0A') 'test' >>> get_label(';test DC.B $0A') # should return nothing >>> get_label(' BEQ test') # should return nothing :param line: The line to get the label from :return: The label in the line """ if not has_label(line): return None stripped = strip_comments(line) label = '' for c in stripped: if c == ' ': break label += c return label
[docs]def strip_label(line: str) -> str: """ Strips the label from a line, isolating the rest of the line (side effect: also strips comments) >>> strip_label('ORG start') 'start' >>> strip_label('RTS ;comm') '' >>> strip_label(';all commented') '' >>> strip_label('MOVE D0, D1 * Moves D0 into D1') 'D0, D1 ' :param line: The line to strip the label from :return: The stripped line """ stripped = strip_comments(line) if not stripped.strip(): # This line is literally empty after removing comments return '' to_return = '' found_space = False found_next = False for c in stripped: if c == ' ' and not found_space: found_space = True continue if c != ' ' and found_space and not found_next: found_next = True if found_next: to_return += c return to_return
[docs]def get_opcode(line: str) -> str: """ Gets the opcode out of a line (with or without label) >>> get_opcode('start EQU $400') 'EQU' >>> get_opcode(' MOVE.B D0, D1') 'MOVE.B' >>> get_opcode('; start EQU $400') '' :param line: The line to get the opcode from :return: The opcode found in said line """ stripped_comm = strip_comments(line) if not stripped_comm.strip(): return '' if has_label(stripped_comm): stripped = strip_label(stripped_comm).strip() else: stripped = stripped_comm.strip() opcode = '' for c in stripped: if c == ' ': break opcode += c return opcode.upper()
[docs]def strip_opcode(line: str) -> str: """ Strips the opcode from a line >>> strip_opcode('start EQU $400') '$400' >>> strip_opcode(' MOVE.B D0, D1') 'D0, D1' >>> strip_opcode(' RTS') '' >>> strip_opcode('start EQU $400 ; comments!') '$400' :param line: The line to strip the opcode from :return: The line with the opcode stripped (as well as comments and labels) """ stripped_comm = strip_comments(line) if not stripped_comm.strip(): return '' if has_label(stripped_comm): stripped = strip_label(stripped_comm).strip() else: stripped = stripped_comm.strip() # We're now down to just the opcode + parameters, time to strip the opcode post_op = '' found_space = False found_next = False for c in stripped: if c == ' ' and not found_space: found_space = True continue if c != ' ' and not found_next and found_space: found_next = True if found_next: post_op += c return post_op