Skip to content

Commit c988088

Browse files
committed
Adding HappyBase Connection.create_table().
1 parent 91be693 commit c988088

File tree

2 files changed

+300
-0
lines changed

2 files changed

+300
-0
lines changed

gcloud/bigtable/happybase/connection.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,15 @@
1515
"""Google Cloud Bigtable HappyBase connection module."""
1616

1717

18+
import datetime
1819
import warnings
1920

2021
import six
2122

2223
from gcloud.bigtable.client import Client
24+
from gcloud.bigtable.column_family import GCRuleIntersection
25+
from gcloud.bigtable.column_family import MaxAgeGCRule
26+
from gcloud.bigtable.column_family import MaxVersionsGCRule
2327
from gcloud.bigtable.happybase.table import Table
2428
from gcloud.bigtable.table import Table as _LowLevelTable
2529

@@ -265,6 +269,77 @@ def tables(self):
265269

266270
return table_names
267271

272+
def create_table(self, name, families):
273+
"""Create a table.
274+
275+
.. warning::
276+
277+
The only column family options from HappyBase that are able to be
278+
used with Cloud Bigtable are ``max_versions`` and ``time_to_live``.
279+
280+
.. note::
281+
282+
This method is **not** atomic. The Cloud Bigtable API separates
283+
the creation of a table from the creation of column families. Thus
284+
this method needs to send 1 request for the table creation and 1
285+
request for each column family. If any of these fails, the method
286+
will fail, but the progress made towards completion cannot be
287+
rolled back.
288+
289+
Values in ``families`` represent column family options. In HappyBase,
290+
these are dictionaries, corresponding to the ``ColumnDescriptor``
291+
structure in the Thrift API. The accepted keys are:
292+
293+
* ``max_versions`` (``int``)
294+
* ``compression`` (``str``)
295+
* ``in_memory`` (``bool``)
296+
* ``bloom_filter_type`` (``str``)
297+
* ``bloom_filter_vector_size`` (``int``)
298+
* ``bloom_filter_nb_hashes`` (``int``)
299+
* ``block_cache_enabled`` (``bool``)
300+
* ``time_to_live`` (``int``)
301+
302+
:type name: str
303+
:param name: The name of the table to be created.
304+
305+
:type families: dict
306+
:param families: Dictionary with column family names as keys and column
307+
family options as the values. The options can be among
308+
309+
* :class:`dict`
310+
* :class:`.GarbageCollectionRule`
311+
312+
:raises: :class:`TypeError <exceptions.TypeError>` if ``families`` is
313+
not a dictionary,
314+
:class:`ValueError <exceptions.ValueError>` if ``families``
315+
has no entries
316+
"""
317+
if not isinstance(families, dict):
318+
raise TypeError('families arg must be a dictionary')
319+
320+
if not families:
321+
raise ValueError('Cannot create table %r (no column '
322+
'families specified)' % (name,))
323+
324+
# Parse all keys before making any API requests.
325+
gc_rule_dict = {}
326+
for column_family_name, option in families.items():
327+
if isinstance(column_family_name, six.binary_type):
328+
column_family_name = column_family_name.decode('utf-8')
329+
if column_family_name.endswith(':'):
330+
column_family_name = column_family_name[:-1]
331+
gc_rule_dict[column_family_name] = _parse_family_option(option)
332+
333+
# Create table instance and then make API calls.
334+
name = self._table_name(name)
335+
low_level_table = _LowLevelTable(name, self._cluster)
336+
low_level_table.create()
337+
338+
for column_family_name, gc_rule in gc_rule_dict.items():
339+
column_family = low_level_table.column_family(
340+
column_family_name, gc_rule=gc_rule)
341+
column_family.create()
342+
268343
def delete_table(self, name, disable=False):
269344
"""Delete the specified table.
270345
@@ -336,3 +411,51 @@ def compact_table(self, name, major=False):
336411
"""
337412
raise NotImplementedError('The Cloud Bigtable API does not support '
338413
'compacting a table.')
414+
415+
416+
def _parse_family_option(option):
417+
"""Parses a column family option into a garbage collection rule.
418+
419+
.. note::
420+
421+
If ``option`` is not a dictionary, the type is not checked.
422+
If ``option`` is :data:`None`, there is nothing to do, since this
423+
is the correct output.
424+
425+
:type option: :class:`dict`,
426+
:data:`NoneType <types.NoneType>`,
427+
:class:`.GarbageCollectionRule`
428+
:param option: A column family option passes as a dictionary value in
429+
:meth:`Connection.create_table`.
430+
431+
:rtype: :class:`.GarbageCollectionRule`
432+
:returns: A garbage collection rule parsed from the input.
433+
:raises: :class:`ValueError <exceptions.ValueError>` if ``option`` is a
434+
dictionary but keys other than ``max_versions`` and
435+
``time_to_live`` are used.
436+
"""
437+
result = option
438+
if isinstance(result, dict):
439+
if not set(result.keys()) <= set(['max_versions', 'time_to_live']):
440+
raise ValueError('Cloud Bigtable only supports max_versions and '
441+
'time_to_live column family settings',
442+
'Received', result.keys())
443+
444+
max_num_versions = result.get('max_versions')
445+
max_age = None
446+
if 'time_to_live' in result:
447+
max_age = datetime.timedelta(seconds=result['time_to_live'])
448+
449+
if len(result) == 0:
450+
result = None
451+
elif len(result) == 1:
452+
if max_num_versions is None:
453+
result = MaxAgeGCRule(max_age)
454+
else:
455+
result = MaxVersionsGCRule(max_num_versions)
456+
else: # By our check above we know this means len(result) == 2.
457+
rule1 = MaxAgeGCRule(max_age)
458+
rule2 = MaxVersionsGCRule(max_num_versions)
459+
result = GCRuleIntersection(rules=[rule1, rule2])
460+
461+
return result

gcloud/bigtable/happybase/test_connection.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,90 @@ def test_tables_with_prefix(self):
311311
result = connection.tables()
312312
self.assertEqual(result, [unprefixed_table_name1])
313313

314+
def test_create_table(self):
315+
import operator
316+
from gcloud._testing import _Monkey
317+
from gcloud.bigtable.happybase import connection as MUT
318+
319+
cluster = _Cluster() # Avoid implicit environ check.
320+
connection = self._makeOne(autoconnect=False, cluster=cluster)
321+
mock_gc_rule = object()
322+
called_options = []
323+
324+
def mock_parse_family_option(option):
325+
called_options.append(option)
326+
return mock_gc_rule
327+
328+
name = 'table-name'
329+
col_fam1 = 'cf1'
330+
col_fam_option1 = object()
331+
col_fam2 = u'cf2'
332+
col_fam_option2 = object()
333+
col_fam3 = b'cf3'
334+
col_fam_option3 = object()
335+
families = {
336+
col_fam1: col_fam_option1,
337+
# A trailing colon is also allowed.
338+
col_fam2 + ':': col_fam_option2,
339+
col_fam3 + b':': col_fam_option3,
340+
}
341+
342+
tables_created = []
343+
344+
def make_table(*args, **kwargs):
345+
result = _MockLowLevelTable(*args, **kwargs)
346+
tables_created.append(result)
347+
return result
348+
349+
with _Monkey(MUT, _LowLevelTable=make_table,
350+
_parse_family_option=mock_parse_family_option):
351+
connection.create_table(name, families)
352+
353+
# Just one table would have been created.
354+
table_instance, = tables_created
355+
self.assertEqual(table_instance.args, (name, cluster))
356+
self.assertEqual(table_instance.kwargs, {})
357+
self.assertEqual(table_instance.create_calls, 1)
358+
359+
# Check if our mock was called twice, but we don't know the order.
360+
self.assertEqual(
361+
set(called_options),
362+
set([col_fam_option1, col_fam_option2, col_fam_option3]))
363+
364+
# We expect three column family instances created, but don't know the
365+
# order due to non-deterministic dict.items().
366+
col_fam_created = table_instance.col_fam_created
367+
self.assertEqual(len(col_fam_created), 3)
368+
col_fam_created.sort(key=operator.attrgetter('column_family_id'))
369+
self.assertEqual(col_fam_created[0].column_family_id, col_fam1)
370+
self.assertEqual(col_fam_created[0].gc_rule, mock_gc_rule)
371+
self.assertEqual(col_fam_created[0].create_calls, 1)
372+
self.assertEqual(col_fam_created[1].column_family_id, col_fam2)
373+
self.assertEqual(col_fam_created[1].gc_rule, mock_gc_rule)
374+
self.assertEqual(col_fam_created[1].create_calls, 1)
375+
self.assertEqual(col_fam_created[2].column_family_id,
376+
col_fam3.decode('utf-8'))
377+
self.assertEqual(col_fam_created[2].gc_rule, mock_gc_rule)
378+
self.assertEqual(col_fam_created[2].create_calls, 1)
379+
380+
def test_create_table_bad_type(self):
381+
cluster = _Cluster() # Avoid implicit environ check.
382+
connection = self._makeOne(autoconnect=False, cluster=cluster)
383+
384+
name = 'table-name'
385+
families = None
386+
with self.assertRaises(TypeError):
387+
connection.create_table(name, families)
388+
389+
def test_create_table_bad_value(self):
390+
cluster = _Cluster() # Avoid implicit environ check.
391+
connection = self._makeOne(autoconnect=False, cluster=cluster)
392+
393+
name = 'table-name'
394+
families = {}
395+
with self.assertRaises(ValueError):
396+
connection.create_table(name, families)
397+
314398
def test_delete_table(self):
315399
from gcloud._testing import _Monkey
316400
from gcloud.bigtable.happybase import connection as MUT
@@ -376,6 +460,78 @@ def test_compact_table(self):
376460
connection.compact_table(name, major=major)
377461

378462

463+
class Test__parse_family_option(unittest2.TestCase):
464+
465+
def _callFUT(self, option):
466+
from gcloud.bigtable.happybase.connection import _parse_family_option
467+
return _parse_family_option(option)
468+
469+
def test_dictionary_no_keys(self):
470+
option = {}
471+
result = self._callFUT(option)
472+
self.assertEqual(result, None)
473+
474+
def test_null(self):
475+
option = None
476+
result = self._callFUT(option)
477+
self.assertEqual(result, None)
478+
479+
def test_dictionary_bad_key(self):
480+
option = {'badkey': None}
481+
with self.assertRaises(ValueError):
482+
self._callFUT(option)
483+
484+
def test_dictionary_versions_key(self):
485+
from gcloud.bigtable.column_family import MaxVersionsGCRule
486+
487+
versions = 42
488+
option = {'max_versions': versions}
489+
result = self._callFUT(option)
490+
491+
gc_rule = MaxVersionsGCRule(versions)
492+
self.assertEqual(result, gc_rule)
493+
494+
def test_dictionary_ttl_key(self):
495+
import datetime
496+
from gcloud.bigtable.column_family import MaxAgeGCRule
497+
498+
time_to_live = 24 * 60 * 60
499+
max_age = datetime.timedelta(days=1)
500+
option = {'time_to_live': time_to_live}
501+
result = self._callFUT(option)
502+
503+
gc_rule = MaxAgeGCRule(max_age)
504+
self.assertEqual(result, gc_rule)
505+
506+
def test_dictionary_both_keys(self):
507+
import datetime
508+
from gcloud.bigtable.column_family import GCRuleIntersection
509+
from gcloud.bigtable.column_family import MaxAgeGCRule
510+
from gcloud.bigtable.column_family import MaxVersionsGCRule
511+
512+
versions = 42
513+
time_to_live = 24 * 60 * 60
514+
option = {
515+
'max_versions': versions,
516+
'time_to_live': time_to_live,
517+
}
518+
result = self._callFUT(option)
519+
520+
max_age = datetime.timedelta(days=1)
521+
# NOTE: This relies on the order of the rules in the method we are
522+
# calling matching this order here.
523+
gc_rule1 = MaxAgeGCRule(max_age)
524+
gc_rule2 = MaxVersionsGCRule(versions)
525+
gc_rule = GCRuleIntersection(rules=[gc_rule1, gc_rule2])
526+
self.assertEqual(result, gc_rule)
527+
528+
def test_non_dictionary(self):
529+
option = object()
530+
self.assertFalse(isinstance(option, dict))
531+
result = self._callFUT(option)
532+
self.assertEqual(result, option)
533+
534+
379535
class _Client(object):
380536

381537
def __init__(self, *args, **kwargs):
@@ -418,12 +574,33 @@ def list_tables(self):
418574
return self.list_tables_result
419575

420576

577+
class _MockLowLevelColumnFamily(object):
578+
579+
def __init__(self, column_family_id, gc_rule=None):
580+
self.column_family_id = column_family_id
581+
self.gc_rule = gc_rule
582+
self.create_calls = 0
583+
584+
def create(self):
585+
self.create_calls += 1
586+
587+
421588
class _MockLowLevelTable(object):
422589

423590
def __init__(self, *args, **kwargs):
424591
self.args = args
425592
self.kwargs = kwargs
426593
self.delete_calls = 0
594+
self.create_calls = 0
595+
self.col_fam_created = []
427596

428597
def delete(self):
429598
self.delete_calls += 1
599+
600+
def create(self):
601+
self.create_calls += 1
602+
603+
def column_family(self, column_family_id, gc_rule=None):
604+
result = _MockLowLevelColumnFamily(column_family_id, gc_rule=gc_rule)
605+
self.col_fam_created.append(result)
606+
return result

0 commit comments

Comments
 (0)