@@ -90,34 +90,42 @@ def get_table_width(options):
9090 panflute .debug ("pantable: invalid table-width" )
9191 return table_width
9292
93- def column_filter_cell (cell , column_filter ):
94- """Match the cell data to the given column_filter
9593
96- Column_filter is a dictionary with the supported keys for filtering.
97- Empty dict always match the cell.
94+ def table_filter_cell (cell , table_filter ):
95+ """Match the cell data to the given table_filter
96+
97+ table_filter is a dictionary with the supported keys for filtering. Empty
98+ dict always match the cell. See `apply_table_filter` for more info on the
99+ dict structure.
100+
101+ Args:
102+ cell: Str of cell content to be matched against.
103+ table_filter: Dict of table_filter rules.
98104
99105 """
100106 if cell is None :
101- # None indicates that the cell index is out of bounds, we need pretend
102- # that we keep it, else the row will be removed because of the out of
103- # bounds index.
107+ # None indicates that the cell index is out of bounds, we need to
108+ # pretend that we keep it, else the row will be removed because of the
109+ # out of bounds index.
104110 return True
105- elif not column_filter :
111+ elif not table_filter :
106112 # Dict is empty, aka no filter function, thus keep the cell
107113 return True
108- elif 'filter' in column_filter :
114+ elif 'filter' in table_filter :
109115 str_universal = basestring if py2 else (str , bytes )
110- if isinstance (column_filter ['filter' ], str_universal ):
111- return cell == column_filter ['filter' ]
112- elif isinstance (column_filter ['filter' ], list ):
116+ if isinstance (table_filter ['filter' ], str_universal ):
117+ return cell == table_filter ['filter' ]
118+ elif isinstance (table_filter ['filter' ], list ):
113119 # Assuming all the list items are of type 'basestring'
114- return any ([cell == match for match in column_filter ['filter' ]])
120+ return any ([cell == match for match in table_filter ['filter' ]])
115121 else :
116- raise Exception ("Unhandled filter type: {}" .format (column_filter ['filter' ]))
117- elif 'regex' in column_filter :
118- return re .match (column_filter ['regex' ], cell ) is not None
122+ raise Exception ("Unhandled filter type: {}"
123+ .format (table_filter ['filter' ]))
124+ elif 'regex' in table_filter :
125+ return re .match (table_filter ['regex' ], cell ) is not None
126+
127+ return True
119128
120- return False
121129# end helper functions
122130
123131
@@ -200,22 +208,24 @@ def parse_alignment(alignment_string, number_of_columns):
200208
201209 return alignment
202210
203- def apply_column_filter (options , raw_table_list ):
204- """Apply column_filter to the specified columns, if specified.
205211
206- If the column_filter is not specified or is an empty list, then the table is
207- not modified. Else the raw_table_list is filtered based on the values in
208- the column_filter (i.e., column indexes not specified in the filter is removed).
212+ def apply_table_filter (options , rows ):
213+ """Apply the filter to the rows and/or columns if specified in the options.
209214
210- Each element in the column_filter list must be an integer or a dictionary
215+ If the filter is not specified or is an empty list, then the table is
216+ not modified.
217+
218+ Each element in the filter list must be an integer or a dictionary
211219 with at least the key 'col'.
212220
213- Specifying an integer in the column_filter list makes sure that column
214- index is kept (first column is index 0 -- python list indexing).
221+ Specifying an integer in the filter list makes sure that the column
222+ index is kept (first column index is 0 -- python list indexing), all other
223+ columns are removed.
215224
216- Specifying a dictionary, gives the optional possibility of specifying the
217- following keys in the dictionary (note: the keys are mutually exclusive and
218- specifying more than one has undefined behaviour).
225+ Specifying a dictionary with at least the 'col' key, gives the optional
226+ possibility of specifying the following keys in the dictionary (note: the
227+ keys are mutually exclusive and specifying more than one has undefined
228+ behaviour).
219229
220230 - filter: filters out (removes) the row, if the content inside this
221231 column doesn't match (exact string matching of the value of this key
@@ -226,20 +236,20 @@ def apply_column_filter(options, raw_table_list):
226236 column doesn't match. The value of this key is placed directly into
227237 `re.match(pattern, string)` as the `pattern` and the cell value as
228238 the `string`. Note: Currently we assume that a small amount of
229- regex's is used, such that we don't have to deal with compiling of
239+ regex's is used, such that we don't have to deal with compiling the
230240 regex's, but rely on the built in caching to handle it for us.
231241
232242
233243 Example: This example won't filter out any column, but it demonstrates the
234- three different ways that you may specify a column -filter. Just try and
244+ three different ways that you may specify a table -filter. Just try and
235245 make changes to either one of them, and see how either columns or rows will
236246 be filtered from the resulting table.
237247
238248 ```{.table}
239249 ---
240250 caption: "*Bar* table"
241251 markdown: yes
242- column -filter:
252+ table -filter:
243253 - 0
244254 - col: 1
245255 regex: ".*B|[\\ d]"
@@ -250,39 +260,66 @@ def apply_column_filter(options, raw_table_list):
250260 1,2,3
251261 ```
252262
253- """
263+ Args:
264+ options: Dict of the YAML defined in the beginning of the CodeBlock
265+ rows: A generator over the rows in the table.
254266
255- column_filter = options .get ('column-filter' , None )
256- if not column_filter :
257- return raw_table_list
258-
259- # Normalise the column_filter into a dictionary, so we can easily lookup
260- # column indexes. Each column index will have a dictionary as its value.
261- # This is where any filter definitions is stored, if there are any.
262- column_filter_dict = {}
263- for x in column_filter :
264- if isinstance (x , int ):
265- column_filter_dict [x ] = {}
266- elif isinstance (x , dict ):
267+ """
268+ table_filter = options .get ('table-filter' , [])
269+ if table_filter == []:
270+ # Return the rows unchanged.
271+ return rows
272+
273+ # Normalise the table_filter list into a dictionary, so we can easily lookup
274+ # column indexes -- Mainly converting integer filters to dictionaries. The
275+ # value of each index is a dictionary, which is empty if no filter was
276+ # specified.
277+ table_filter_dict = {}
278+ for cell_filter in table_filter :
279+ if isinstance (cell_filter , int ):
280+ table_filter_dict [cell_filter ] = {}
281+ elif isinstance (cell_filter , dict ):
267282 # Verify that we have a 'col' key
268- col = x .get ('col' , None )
269- assert col is not None , "Dictionary must contain a 'col' key: {}" . format (x )
283+ col = cell_filter .get ('col' , None )
284+ assert col is not None , "Dictionary type table filters must contain a 'col' key: {}" \
285+ .format (cell_filter )
270286 # remove the 'col' key and convert it to an int.
271- del x ['col' ]
287+ del cell_filter ['col' ]
272288 col = int (col )
273- # Add the remaining dict as our column_filter for this column index
274- column_filter_dict [col ] = x
289+ # Add the remaining dict as our filter for this column index
290+ table_filter_dict [col ] = cell_filter
275291 else :
276- raise Exception ("column-filter element is of non supported type: {}" .format (x ))
277- return [
278- [cell for idx , cell in enumerate (row ) if idx in column_filter_dict .keys ()]
279- # Filter out the rows ...
280- for row in raw_table_list if
281- # ... where cells (that have filters) in the row, doesn't satisfy
282- # all the filters. Use None as cell content if column index is out of bounds
283- all ([column_filter_cell (row [idx ] if idx < len (row ) else None , f ) for idx , f
284- in column_filter_dict .iteritems ()])
285- ]
292+ raise Exception ("table-filter element is of non supported type: {}"
293+ .format (cell_filter ))
294+
295+ # Lastly we need to iterate over the rows and only return the rows and
296+ # columns that should be kept.
297+ table_filter_keys = [k for k , v in table_filter_dict .items ()
298+ if 'exclude' not in v or bool (v ['exclude' ]) == False ]
299+ # We need to handle if the first row is a header.
300+ if 'header' in options and options ['header' ]:
301+ header_row = rows [0 ]
302+ rows = rows [1 :]
303+ yield [cell for idx , cell in enumerate (header_row ) if idx in table_filter_keys ]
304+
305+ for row in rows :
306+ # This dict should be fairly small,m so `.items()` is fine to use in PY2
307+ # where it returns a list instead of a generator.
308+ #
309+ # If not all table_filters match this row, then filter it out by
310+ # continuing to the next.
311+ try :
312+ if not all ([table_filter_cell (row [idx ], cell_filter ) for \
313+ idx , cell_filter in table_filter_dict .items ()]):
314+ continue
315+ except IndexError :
316+ raise IndexError ("You specified a column index (zero indexed) that "
317+ "was bigger than the number of columns ({}) in the row: '{}'"
318+ .format (len (row ), row ))
319+
320+ # Remove the non specified columns, and return the resulting row.
321+ yield [cell for idx , cell in enumerate (row ) if idx in table_filter_keys ]
322+
286323
287324def read_data (options , include , data ):
288325 """
@@ -303,7 +340,7 @@ def read_data(options, include, data):
303340 raw_table_list = None
304341 panflute .debug ("pantable: file not found from the path" , include )
305342
306- return apply_column_filter ( options , raw_table_list )
343+ return list ( apply_table_filter ( options , raw_table_list ) )
307344
308345
309346def regularize_table_list (raw_table_list ):
0 commit comments