permap/per_file_parser.py at main · redballoonsecurity/permap · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import binaryninja
import re


import re

class PerFileParser:
    """
    A parser for Lauterbach .per files to extract peripheral mappings
    and generate a Binary Ninja script.
    """

    def __init__(self, filename: str, cpu: str):
        """
        Initializes the parser with the given .per file.

        Args:
            filename (str): The path to the .per file to parse.
        """
        self.filename = filename
        self.base_addr = None
        self.lines = []
        self.tree_stack = []
        self.parsed_data = []
        self.cpu = cpu
        # State variables to handle nesting
        # Each entry in this stack will be a tuple:
        # (already_matched_in_this_block, should_consider)
        # "already_matched_in_this_block": True if a previous sif/elif in the same block has already matched and chosen lines
        # "should_consider": True if we are currently inside a chosen block of code
        self.condition_stack = []
        self.should_consider = True


    def parse(self):
        """
        Parses the .per file and extracts mapped entries.
        """
        with open(self.filename, 'r') as f:
            self.lines = [line.strip() for line in f]

        for index, line in enumerate(self.lines):
            # If statements and conditionals do not apply to lines containing tree or base addr
            self._parse_tree_name(line)
            self._parse_base_addr(line)
            stripped = line.strip()
            if stripped.startswith('sif '):
                # Starting a new conditional block
                cond_str = stripped[4:].strip()
                # Push a new state
                cond_result = self.evaluate_condition(cond_str, self.cpu)
                # When we enter a sif, no previous match in this block, so:
                self.condition_stack.append((cond_result, cond_result))
                self.should_consider = self.should_consider and cond_result
            elif stripped.startswith('elif '):
                cond_str = stripped[5:].strip()
                # We are continuing in the same block, so look at the top of the stack
                if self.condition_stack:
                    already_matched, _ = self.condition_stack.pop()
                    if already_matched:
                        # If already matched some previous sif/elif, this elif won't match
                        # Re-push with same already_matched = True, self.should_print = False
                        self.condition_stack.append((True, False))
                        self.should_consider = False
                    else:
                        # No previous match, evaluate this
                        cond_result = self.evaluate_condition(cond_str, self.cpu)
                        # If this matches, set already_matched = True, else False
                        self.condition_stack.append((cond_result, cond_result))
                        self.should_consider = cond_result
            elif stripped.startswith('else'):
                # if no conditions matched so far
                if self.condition_stack:
                    already_matched, _ = self.condition_stack.pop()
                    if already_matched:
                        # Already matched, so else is skipped
                        self.condition_stack.append((True, False))
                        self.should_consider = False
                    else:
                        # No match yet, else block executes
                        self.condition_stack.append((True, True))
                        self.should_consider = True
            elif stripped.startswith('endif'):
                # End of current block
                if self.condition_stack:
                    self.condition_stack.pop()
                # Determine self.should_consider based on what's left on the stack
                self.should_consider = True
                for am, sc in self.condition_stack:
                    # If any upper block is false, we can't print
                    self.should_consider = self.should_consider and sc
            else:
                # Normal line
                if self.should_consider:
                    mapped_entry = self._parse_mapped_name(index, line)
                    if mapped_entry:
                        self.parsed_data.append(mapped_entry)

    def _parse_tree_name(self, line: str):
        """
        Parses tree names and updates the tree stack.

        Args:
            line (str): The current line from the .per file.
        """
        if line.startswith("tree"):
            if "tree.end" in line:
                if self.tree_stack:
                    self.tree_stack.pop()
            else:
                match = re.match(r'tree\s+"([^"]+)"', line)
                if match:
                    tree_name = match.group(1)
                    self.tree_stack.append(tree_name)

    def _parse_base_addr(self, line: str):
        """
        Parses the base addr and updates the class variable.

        Args:
            line: The current line from the .per file.
        """
        match  = re.match(
            r'base.*(0x[0-9A-Fa-f]+)',
            line
        )
        if match:
            self.base_addr, = match.groups()

    def _parse_mapped_name(self, index, line):
        """
        Parses mapped names and returns a dictionary with the extracted data.

        Args:
            index (int): The current line index.
            line (str): The current line from the .per file.

        Returns:
            dict or None: A dictionary with the parsed data or None if parsing fails.
        """
        match = re.match(
            r'group.(?P<type>\w+) \(?(?P<baseAddr>0x[0-9A-Fa-f]+)*?\+?(?P<offset>[\.x:a-fA-F0-9]+)\)?\+\+(?:0x[A-Fa-f0-9]+)( \"(?P<name>[^\"]+)\")?',
            line
        )
        if match:
            captures = match.groupdict()
            name = ""
            if not captures["name"]:
                # Look ahead to the next line for the name
                if index + 1 < len(self.lines):
                    name_match = re.match(
                        r'line.(?:\w+)\s+[xa-fA-F0-9]+ \"(.+)\"',
                        self.lines[index + 1]
                    )
                    if name_match:
                        name, = name_match.groups()
            else:
                name = captures["name"]
            baseAddr = captures["baseAddr"] if captures["baseAddr"] else self.base_addr
            if name: # On rare occation name will still be none at this point. Might just be an error in the .per file.
                offset = captures["offset"]
                if not baseAddr: # If baseAddr is still none we can assume that the offset is just the base addr
                    baseAddr = offset
                    offset = "0x0"
                _, _, offset = offset.rpartition(":")
                _, _, baseAddr = baseAddr.rpartition(":")
                name = name.strip()
                return self._create_mapped_entry(captures["type"], baseAddr, offset, name)
        return None

    def _create_mapped_entry(self, _type, _address, _offset, _name):
        """
        Creates a mapped entry dictionary from the extracted data.

        Args:
            _type (str): The type extracted from the .per file.
            _address (str): The base address as a string.
            _offset (str): The offset as a string.
            _name (str): The name of the peripheral.

        Returns:
            dict or None: A dictionary with the mapped entry or None if address is invalid.
        """
        address = self._calculate_address(_address, _offset)
        tree_name = ": ".join(self.tree_stack) + ": " if self.tree_stack else ""
        if address:
            return {
                'address': address,
                'type': _type,
                'name': _name,
                'tree': tree_name,
            }
        else:
            return None

    def _calculate_address(self, base_address: str, offset: str):
        """
        Calculates the actual address from the base address and offset.

        Args:
            base_address (str): The base address as a string.
            offset (str): The offset as a string.

        Returns:
            int or None: The calculated address or None if calculation fails.
        """
        try:
            if offset.startswith("0x"):
                return int(base_address, 16) + int(offset, 16)
            elif "." in offset:
                # Remove trailing dot and convert to integer
                return int(base_address, 16) + int(offset.rstrip('.'), 10)
            else:
                # Assume decimal offset
                return int(base_address, 16) + int(offset, 10)
        except ValueError as e:
            binaryninja.log_error(f"Error calculating address: {e}")
            return None


    # Helper function to evaluate a condition of form (cpu()=="LPCXXX"||cpu()=="LPCYYY") etc.
    def evaluate_condition(self, cond_str, cpu):
        # cond_str looks like: (cpu()=="LPC2880"||cpu()=="LPC2888") etc.
        # We'll extract all cpu names from it and check if any match.

        # A simple regex to find occurrences of cpu()=="XYZ"
        matches = re.findall(r'cpu\(\)==\"([A-Za-z0-9/]+)\"', cond_str)
        # Check OR conditions:
        # If the line has ||, then we return True if any matches
        # If it had &&, would need more complex logic.
        if cpu == "": # User didn't enter a CPU
            return True
        for m in matches:
            if m == cpu:
                return True

        return False