from ..core.util.parsing import strip_comments, has_label, get_label, strip_label, get_opcode, strip_opcode, \
parse_literal
import types
import re
import binascii
from ..core import opcodes
from ..core.models.list_file import ListFile
from ..core.util.find_module import find_opcode_cls
# This *is* actually a necessary import due to using "reflection" style code further down
# noinspection PyUnresolvedReferences
from ..core.opcodes import *
MAX_MEMORY_LOCATION = 16777216 # 2^24
[docs]def for_line_opcode_parse(full_text: str):
"""
Yields the label (if it exists), opcode, and opcode contents for every line in a file
:param full_text: The file text to parse
:return: Yields the label (or None), opcode, and opcode contents (returns nothing)
"""
for line_index, stripped in for_line_stripped_comments(full_text):
yield get_label(stripped) if has_label(stripped) else None, get_opcode(stripped), strip_opcode(stripped)
[docs]def find_labels(text: str) -> (dict, dict, list):
"""
Finds all labels from a file
:param text: The text to search through for labels
:return: In order, labels (dict of label to line index + label contents), equates (dict of label to contents), and
issues (list of message + severity)
"""
labels = {}
equates = {}
issues = []
for line_index, stripped in for_line_stripped_comments(text):
if has_label(stripped):
label = get_label(stripped)
# Remove extra spaces at this point because they're no use
# and could only have negative implications
label_contents = strip_label(stripped).strip()
if label in labels.keys():
issues.append(('Label {} already declared'.format(label), 'ERROR'))
else:
labels[label] = (line_index, label_contents)
if get_opcode(stripped) == 'EQU':
equates[label] = strip_opcode(stripped)
return labels, equates, issues
[docs]def replace_equates(contents: str, equates: dict) -> str:
for equate in equates.items():
contents = contents.replace(equate[0], equate[1])
return contents
[docs]def replace_label_addresses(contents: str, label_addresses: dict) -> str:
for label in label_addresses.items():
contents = contents.replace(label[0], '(${0:08x}).L'.format(label[1]))
return contents
[docs]def replace_labels_with_temps(contents: str, labels: dict) -> str:
"""
Replaces all labels that we don't know the location for with temporary addresses ($00000000)
:param contents: The string to replace labels
:param labels: The labels
:return: The string with labels replaced
"""
for label in labels.items():
contents = contents.replace(label[0], '($00000000).L')
return contents
[docs]def parse(text: str) -> (ListFile, list):
"""
Parses an assembly file and returns a list file, along with errors/warnings from the parsing process.
:param text: The assembly file text to parse
:return: The parsed list file
"""
# --- PART 1: process for labels and equates ---
labels, equates, issues = find_labels(text)
# --- PART 2: process operations for sizing and lay out memory ---
to_return = ListFile()
current_memory_location = 0x00000000
label_addresses = {} # Stores all of the label memory locations
for label, opcode, contents in for_line_opcode_parse(text):
# Equates have already been processed, skip them
# ENDs aren't processed until phase 3, skip them for now
# (this idea could be expanded for more preprocessor directives)
if opcode == 'EQU' or opcode == 'END':
continue
# Replace all substitutions in the current line with their corresponding values
contents = replace_equates(contents, equates)
# Replace all labels with temporary addresses because we don't know their actual values yet
contents = replace_labels_with_temps(contents, labels)
if label is not None:
label_addresses[label] = current_memory_location
to_return.define_symbol(label, current_memory_location)
if opcode == 'ORG': # This will shift our current memory location, it's a special case
try:
new_memory_location = parse_literal(contents)
except:
issues.append(('Error parsing ORG value', 'ERROR'))
continue
if not (0 <= new_memory_location < MAX_MEMORY_LOCATION):
issues.append(('ORG address must be between 0 and 2^24!', 'ERROR'))
continue
current_memory_location = new_memory_location
# Update the label with the new address, if it exists
if label is not None:
label_addresses[label] = current_memory_location
to_return.define_symbol(label, current_memory_location)
continue
# TODO: Possibly cache this (and the module search) for Part 3 later so we don't have to redo introspection?
op_class = find_opcode_cls(opcode)
# We don't know this opcode, there's no module for it
if op_class is None:
issues.append(('Opcode {} is not known: skipping and continuing'.format(opcode), 'ERROR'))
continue
length = op_class.get_word_length(opcode, contents)
current_memory_location += length * 2
current_memory_location = 0x00000000
# --- PART 3: actually create the list file ---
for l, opcode, contents in for_line_opcode_parse(text):
# Equates have already been processed, skip them
# (this idea could be expanded for more preprocessor directives)
if opcode == 'EQU':
continue
# Replace all substitutions in the current line with their corresponding values
contents = replace_equates(contents, equates)
if opcode == 'END':
# End doesn't take an absolute long address, replace it differently
for label in label_addresses.items():
contents = contents.replace(label[0], '${:x}'.format(label[1]))
else:
# Replace all memory labels with their proper values (that's the difference in this step)
contents = replace_label_addresses(contents, label_addresses)
if opcode == 'ORG': # This will shift our current memory location, it's a special case
try:
new_memory_location = parse_literal(contents)
except:
# Don't need to print assertion, we already did that earlier
continue
if not (0 <= new_memory_location < MAX_MEMORY_LOCATION):
continue
current_memory_location = new_memory_location
continue
if opcode == 'END': # This will set our end memory location, it's a special case
start_location = parse_literal(contents)
if not (0 <= start_location < MAX_MEMORY_LOCATION):
continue
to_return.set_starting_execution_address(start_location)
continue
# TODO: Possibly use a cached version?
op_class = find_opcode_cls(opcode)
if op_class is None:
issues.append(('Opcode {} is not known: skipping and continuing'.format(opcode), 'ERROR'))
continue
# check that the input is valid the opcode at the module level
is_valid, issues = op_class.is_valid(opcode, contents)
# for BRA and probably in the future JMP ops...
# addr must be handed off so that they can pull an offset out of the operand address.
if issubclass(op_class, bcc.branch_code):
contents += ", " + str(current_memory_location)
# if valid, then actually construct the opcode
if is_valid:
# get the length of the operation in # of words
length = op_class.get_word_length(opcode, contents)
# make the opcode
data = op_class.from_str(opcode, contents)
# ensure that the data was built correctly and append it
if data is not None:
# instead of converting to a string here, we should make this a method of the base opcode class
to_return.insert_data(current_memory_location, str(binascii.hexlify(data.assemble()))[2:-1])
# Increment our memory counter
current_memory_location += length * 2
return to_return, issues