Source code for easier68k.assembler.assembler

from ..core.util.parsing import strip_comments, has_label, get_label, strip_label, get_opcode, strip_opcode, \
    parse_literal
import types
import re
import binascii
from ..core import opcodes
from ..core.models.list_file import ListFile

from ..core.util.find_module import find_opcode_cls
# This *is* actually a necessary import due to using "reflection" style code further down
# noinspection PyUnresolvedReferences
from ..core.opcodes import *

MAX_MEMORY_LOCATION = 16777216  # 2^24


[docs]def for_line_stripped_comments(full_text: str):
    for line_index, line in enumerate(full_text.splitlines()):
        stripped = strip_comments(line)
        if not stripped.strip():
            continue

        yield line_index + 1, stripped  # line_index + 1 because here the line indices are zero-based


[docs]def for_line_opcode_parse(full_text: str):
    """
    Yields the label (if it exists), opcode, and opcode contents for every line in a file
    :param full_text: The file text to parse
    :return: Yields the label (or None), opcode, and opcode contents (returns nothing)
    """
    for line_index, stripped in for_line_stripped_comments(full_text):
        yield get_label(stripped) if has_label(stripped) else None, get_opcode(stripped), strip_opcode(stripped)


[docs]def find_labels(text: str) -> (dict, dict, list):
    """
    Finds all labels from a file
    :param text: The text to search through for labels
    :return: In order, labels (dict of label to line index + label contents), equates (dict of label to contents), and
             issues (list of message + severity)
    """
    labels = {}
    equates = {}
    issues = []

    for line_index, stripped in for_line_stripped_comments(text):
        if has_label(stripped):
            label = get_label(stripped)

            # Remove extra spaces at this point because they're no use
            # and could only have negative implications
            label_contents = strip_label(stripped).strip()

            if label in labels.keys():
                issues.append(('Label {} already declared'.format(label), 'ERROR'))
            else:
                labels[label] = (line_index, label_contents)
                if get_opcode(stripped) == 'EQU':
                    equates[label] = strip_opcode(stripped)

    return labels, equates, issues


[docs]def replace_equates(contents: str, equates: dict) -> str:
    for equate in equates.items():
        contents = contents.replace(equate[0], equate[1])

    return contents


[docs]def replace_label_addresses(contents: str, label_addresses: dict) -> str:
    for label in label_addresses.items():
        contents = contents.replace(label[0], '(${0:08x}).L'.format(label[1]))

    return contents


[docs]def replace_labels_with_temps(contents: str, labels: dict) -> str:
    """
    Replaces all labels that we don't know the location for with temporary addresses ($00000000)
    :param contents: The string to replace labels
    :param labels: The labels
    :return: The string with labels replaced
    """
    for label in labels.items():
        contents = contents.replace(label[0], '($00000000).L')

    return contents


[docs]def parse(text: str) -> (ListFile, list):
    """
    Parses an assembly file and returns a list file, along with errors/warnings from the parsing process.
    :param text: The assembly file text to parse
    :return: The parsed list file
    """
    # --- PART 1: process for labels and equates ---
    labels, equates, issues = find_labels(text)

    # --- PART 2: process operations for sizing and lay out memory ---
    to_return = ListFile()
    current_memory_location = 0x00000000
    label_addresses = {}  # Stores all of the label memory locations

    for label, opcode, contents in for_line_opcode_parse(text):
        # Equates have already been processed, skip them
        # ENDs aren't processed until phase 3, skip them for now
        # (this idea could be expanded for more preprocessor directives)
        if opcode == 'EQU' or opcode == 'END':
            continue

        # Replace all substitutions in the current line with their corresponding values
        contents = replace_equates(contents, equates)
        # Replace all labels with temporary addresses because we don't know their actual values yet
        contents = replace_labels_with_temps(contents, labels)

        if label is not None:
            label_addresses[label] = current_memory_location
            to_return.define_symbol(label, current_memory_location)

        if opcode == 'ORG':  # This will shift our current memory location, it's a special case
            try:
                new_memory_location = parse_literal(contents)
            except:
                issues.append(('Error parsing ORG value', 'ERROR'))
                continue
            if not (0 <= new_memory_location < MAX_MEMORY_LOCATION):
                issues.append(('ORG address must be between 0 and 2^24!', 'ERROR'))
                continue
            current_memory_location = new_memory_location
            # Update the label with the new address, if it exists
            if label is not None:
                label_addresses[label] = current_memory_location
                to_return.define_symbol(label, current_memory_location)
            continue

        # TODO: Possibly cache this (and the module search) for Part 3 later so we don't have to redo introspection?
        op_class = find_opcode_cls(opcode)
        # We don't know this opcode, there's no module for it
        if op_class is None:
            issues.append(('Opcode {} is not known: skipping and continuing'.format(opcode), 'ERROR'))
            continue

        length = op_class.get_word_length(opcode, contents)

        current_memory_location += length * 2

    current_memory_location = 0x00000000

    # --- PART 3: actually create the list file ---
    for l, opcode, contents in for_line_opcode_parse(text):
        # Equates have already been processed, skip them
        # (this idea could be expanded for more preprocessor directives)
        if opcode == 'EQU':
            continue

        # Replace all substitutions in the current line with their corresponding values
        contents = replace_equates(contents, equates)

        if opcode == 'END':
            # End doesn't take an absolute long address, replace it differently
            for label in label_addresses.items():
                contents = contents.replace(label[0], '${:x}'.format(label[1]))
        else:
            # Replace all memory labels with their proper values (that's the difference in this step)
            contents = replace_label_addresses(contents, label_addresses)

        if opcode == 'ORG':  # This will shift our current memory location, it's a special case
            try:
                new_memory_location = parse_literal(contents)
            except:
                # Don't need to print assertion, we already did that earlier
                continue
            if not (0 <= new_memory_location < MAX_MEMORY_LOCATION):
                continue
            current_memory_location = new_memory_location
            continue

        if opcode == 'END':  # This will set our end memory location, it's a special case
            start_location = parse_literal(contents)
            if not (0 <= start_location < MAX_MEMORY_LOCATION):
                continue
            to_return.set_starting_execution_address(start_location)
            continue

        # TODO: Possibly use a cached version?
        op_class = find_opcode_cls(opcode)

        if op_class is None:
            issues.append(('Opcode {} is not known: skipping and continuing'.format(opcode), 'ERROR'))
            continue

        # check that the input is valid the opcode at the module level
        is_valid, issues = op_class.is_valid(opcode, contents)

        # for BRA and probably in the future JMP ops...
        # addr must be handed off so that they can pull an offset out of the operand address.
        if issubclass(op_class, bcc.branch_code):
            contents += ", " + str(current_memory_location)

        # if valid, then actually construct the opcode
        if is_valid:
            # get the length of the operation in # of words
            length = op_class.get_word_length(opcode, contents)
            # make the opcode
            data = op_class.from_str(opcode, contents)

            # ensure that the data was built correctly and append it
            if data is not None:
                # instead of converting to a string here, we should make this a method of the base opcode class
                to_return.insert_data(current_memory_location, str(binascii.hexlify(data.assemble()))[2:-1])

                # Increment our memory counter
                current_memory_location += length * 2

    return to_return, issues