Skip to content

Commit 9c62628

Browse files
committed
Refactored column_filter to table_filter.
1 parent d8da770 commit 9c62628

1 file changed

Lines changed: 97 additions & 60 deletions

File tree

pantable/pantable.py

Lines changed: 97 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -90,34 +90,42 @@ def get_table_width(options):
9090
panflute.debug("pantable: invalid table-width")
9191
return table_width
9292

93-
def column_filter_cell(cell, column_filter):
94-
"""Match the cell data to the given column_filter
9593

96-
Column_filter is a dictionary with the supported keys for filtering.
97-
Empty dict always match the cell.
94+
def table_filter_cell(cell, table_filter):
95+
"""Match the cell data to the given table_filter
96+
97+
table_filter is a dictionary with the supported keys for filtering. Empty
98+
dict always match the cell. See `apply_table_filter` for more info on the
99+
dict structure.
100+
101+
Args:
102+
cell: Str of cell content to be matched against.
103+
table_filter: Dict of table_filter rules.
98104
99105
"""
100106
if cell is None:
101-
# None indicates that the cell index is out of bounds, we need pretend
102-
# that we keep it, else the row will be removed because of the out of
103-
# bounds index.
107+
# None indicates that the cell index is out of bounds, we need to
108+
# pretend that we keep it, else the row will be removed because of the
109+
# out of bounds index.
104110
return True
105-
elif not column_filter:
111+
elif not table_filter:
106112
# Dict is empty, aka no filter function, thus keep the cell
107113
return True
108-
elif 'filter' in column_filter:
114+
elif 'filter' in table_filter:
109115
str_universal = basestring if py2 else (str, bytes)
110-
if isinstance(column_filter['filter'], str_universal):
111-
return cell == column_filter['filter']
112-
elif isinstance(column_filter['filter'], list):
116+
if isinstance(table_filter['filter'], str_universal):
117+
return cell == table_filter['filter']
118+
elif isinstance(table_filter['filter'], list):
113119
# Assuming all the list items are of type 'basestring'
114-
return any([cell == match for match in column_filter['filter']])
120+
return any([cell == match for match in table_filter['filter']])
115121
else:
116-
raise Exception("Unhandled filter type: {}".format(column_filter['filter']))
117-
elif 'regex' in column_filter:
118-
return re.match(column_filter['regex'], cell) is not None
122+
raise Exception("Unhandled filter type: {}"
123+
.format(table_filter['filter']))
124+
elif 'regex' in table_filter:
125+
return re.match(table_filter['regex'], cell) is not None
126+
127+
return True
119128

120-
return False
121129
# end helper functions
122130

123131

@@ -200,22 +208,24 @@ def parse_alignment(alignment_string, number_of_columns):
200208

201209
return alignment
202210

203-
def apply_column_filter(options, raw_table_list):
204-
"""Apply column_filter to the specified columns, if specified.
205211

206-
If the column_filter is not specified or is an empty list, then the table is
207-
not modified. Else the raw_table_list is filtered based on the values in
208-
the column_filter (i.e., column indexes not specified in the filter is removed).
212+
def apply_table_filter(options, rows):
213+
"""Apply the filter to the rows and/or columns if specified in the options.
209214
210-
Each element in the column_filter list must be an integer or a dictionary
215+
If the filter is not specified or is an empty list, then the table is
216+
not modified.
217+
218+
Each element in the filter list must be an integer or a dictionary
211219
with at least the key 'col'.
212220
213-
Specifying an integer in the column_filter list makes sure that column
214-
index is kept (first column is index 0 -- python list indexing).
221+
Specifying an integer in the filter list makes sure that the column
222+
index is kept (first column index is 0 -- python list indexing), all other
223+
columns are removed.
215224
216-
Specifying a dictionary, gives the optional possibility of specifying the
217-
following keys in the dictionary (note: the keys are mutually exclusive and
218-
specifying more than one has undefined behaviour).
225+
Specifying a dictionary with at least the 'col' key, gives the optional
226+
possibility of specifying the following keys in the dictionary (note: the
227+
keys are mutually exclusive and specifying more than one has undefined
228+
behaviour).
219229
220230
- filter: filters out (removes) the row, if the content inside this
221231
column doesn't match (exact string matching of the value of this key
@@ -226,20 +236,20 @@ def apply_column_filter(options, raw_table_list):
226236
column doesn't match. The value of this key is placed directly into
227237
`re.match(pattern, string)` as the `pattern` and the cell value as
228238
the `string`. Note: Currently we assume that a small amount of
229-
regex's is used, such that we don't have to deal with compiling of
239+
regex's is used, such that we don't have to deal with compiling the
230240
regex's, but rely on the built in caching to handle it for us.
231241
232242
233243
Example: This example won't filter out any column, but it demonstrates the
234-
three different ways that you may specify a column-filter. Just try and
244+
three different ways that you may specify a table-filter. Just try and
235245
make changes to either one of them, and see how either columns or rows will
236246
be filtered from the resulting table.
237247
238248
```{.table}
239249
---
240250
caption: "*Bar* table"
241251
markdown: yes
242-
column-filter:
252+
table-filter:
243253
- 0
244254
- col: 1
245255
regex: ".*B|[\\d]"
@@ -250,39 +260,66 @@ def apply_column_filter(options, raw_table_list):
250260
1,2,3
251261
```
252262
253-
"""
263+
Args:
264+
options: Dict of the YAML defined in the beginning of the CodeBlock
265+
rows: A generator over the rows in the table.
254266
255-
column_filter = options.get('column-filter', None)
256-
if not column_filter:
257-
return raw_table_list
258-
259-
# Normalise the column_filter into a dictionary, so we can easily lookup
260-
# column indexes. Each column index will have a dictionary as its value.
261-
# This is where any filter definitions is stored, if there are any.
262-
column_filter_dict = {}
263-
for x in column_filter:
264-
if isinstance(x, int):
265-
column_filter_dict[x] = {}
266-
elif isinstance(x, dict):
267+
"""
268+
table_filter = options.get('table-filter', [])
269+
if table_filter == []:
270+
# Return the rows unchanged.
271+
return rows
272+
273+
# Normalise the table_filter list into a dictionary, so we can easily lookup
274+
# column indexes -- Mainly converting integer filters to dictionaries. The
275+
# value of each index is a dictionary, which is empty if no filter was
276+
# specified.
277+
table_filter_dict = {}
278+
for cell_filter in table_filter:
279+
if isinstance(cell_filter, int):
280+
table_filter_dict[cell_filter] = {}
281+
elif isinstance(cell_filter, dict):
267282
# Verify that we have a 'col' key
268-
col = x.get('col', None)
269-
assert col is not None, "Dictionary must contain a 'col' key: {}". format(x)
283+
col = cell_filter.get('col', None)
284+
assert col is not None, "Dictionary type table filters must contain a 'col' key: {}" \
285+
.format(cell_filter)
270286
# remove the 'col' key and convert it to an int.
271-
del x['col']
287+
del cell_filter['col']
272288
col = int(col)
273-
# Add the remaining dict as our column_filter for this column index
274-
column_filter_dict[col] = x
289+
# Add the remaining dict as our filter for this column index
290+
table_filter_dict[col] = cell_filter
275291
else:
276-
raise Exception("column-filter element is of non supported type: {}".format(x))
277-
return [
278-
[cell for idx, cell in enumerate(row) if idx in column_filter_dict.keys()]
279-
# Filter out the rows ...
280-
for row in raw_table_list if
281-
# ... where cells (that have filters) in the row, doesn't satisfy
282-
# all the filters. Use None as cell content if column index is out of bounds
283-
all ([column_filter_cell(row[idx] if idx < len(row) else None, f) for idx, f
284-
in column_filter_dict.iteritems()])
285-
]
292+
raise Exception("table-filter element is of non supported type: {}"
293+
.format(cell_filter))
294+
295+
# Lastly we need to iterate over the rows and only return the rows and
296+
# columns that should be kept.
297+
table_filter_keys = [k for k, v in table_filter_dict.items()
298+
if 'exclude' not in v or bool(v['exclude']) == False]
299+
# We need to handle if the first row is a header.
300+
if 'header' in options and options['header']:
301+
header_row = rows[0]
302+
rows = rows[1:]
303+
yield [cell for idx, cell in enumerate(header_row) if idx in table_filter_keys]
304+
305+
for row in rows:
306+
# This dict should be fairly small,m so `.items()` is fine to use in PY2
307+
# where it returns a list instead of a generator.
308+
#
309+
# If not all table_filters match this row, then filter it out by
310+
# continuing to the next.
311+
try:
312+
if not all([table_filter_cell(row[idx], cell_filter) for \
313+
idx, cell_filter in table_filter_dict.items()]):
314+
continue
315+
except IndexError:
316+
raise IndexError("You specified a column index (zero indexed) that "
317+
"was bigger than the number of columns ({}) in the row: '{}'"
318+
.format(len(row), row))
319+
320+
# Remove the non specified columns, and return the resulting row.
321+
yield [cell for idx, cell in enumerate(row) if idx in table_filter_keys]
322+
286323

287324
def read_data(options, include, data):
288325
"""
@@ -303,7 +340,7 @@ def read_data(options, include, data):
303340
raw_table_list = None
304341
panflute.debug("pantable: file not found from the path", include)
305342

306-
return apply_column_filter(options, raw_table_list)
343+
return list(apply_table_filter(options, raw_table_list))
307344

308345

309346
def regularize_table_list(raw_table_list):

0 commit comments

Comments
 (0)