diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1a18c16..233b605 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 012 (December 2023)
+* Add new worksheet class
+* Add worksheet items attribute to workbook class
+
## 011 (November 2022)
* Remove extraneous debug print statements
diff --git a/docs/docs/api-ref.md b/docs/docs/api-ref.md
index e6491f2..ae70c87 100644
--- a/docs/docs/api-ref.md
+++ b/docs/docs/api-ref.md
@@ -34,12 +34,35 @@ Saves any changes to the workbook to a new file specified by the `new_file` para
`self.worksheets:` Returns a list of worksheets found in the workbook.
+`self.worksheet_items:` Returns a list of Worksheet objects found in the workbook.
+
`self.datasources:` Returns a list of Datasource objects found in the workbook.
`self.filename:` Returns the filename of the workbook.
`self.shapes` Returns a list of strings with the names of shapes found in the workbook.
+## Worksheets
+```python
+class Worksheet(wsxml):
+```
+
+The Worksheet class represents the worksheets found in a Tableau Workbook. The library will access key attributes of each worksheet it finds.
+
+**Properties:**
+
+`self.name`: Returns the name of the worksheet.
+
+`self.datasources`: Returns list of the Datasource objects that are used in the worksheet.
+
+`self.fields`: Returns list of Field objects that are used somewhere within the sheet.
+
+`self.rows`: Returns list of Field objects present on the rows shelf. Certain items will return a string value, such as "Measure Names" which is not a field.
+
+`self.cols`: Returns list of Field objects present on the columns shelf. Certain items will return a string value, such as "Measure Names" which is not a field.
+
+`self.filter_fields`: Returns list of Field objects that are present on the Filter pane. Certain items will return a string value, such as Filter Actions.
+
## Datasources
```python
class Datasource(dsxml, filename=None)
diff --git a/setup.py b/setup.py
index 90668ae..e06da6a 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
setup(
name='tableaudocumentapi',
- version='0.11',
+ version='0.12',
author='Tableau',
author_email='github@tableau.com',
url='https://github.com/tableau/document-api-python',
diff --git a/tableaudocumentapi/workbook.py b/tableaudocumentapi/workbook.py
index 4c425da..3c0ed1d 100644
--- a/tableaudocumentapi/workbook.py
+++ b/tableaudocumentapi/workbook.py
@@ -1,9 +1,157 @@
import weakref
+import re
-from tableaudocumentapi import Datasource, xfile
+from tableaudocumentapi import Datasource, Field, xfile
from tableaudocumentapi.xfile import xml_open, TableauInvalidFileException
+def _remove_brackets(text):
+ return text.lstrip("[").rstrip("]")
+
+def _clean_columns(marks):
+ """
+ Extract rows/cols data that is stored such as [datasource].[column]
+ We use a regex to find multiple marks and another regex to extract the field name
+
+ We return a dictionary of datasource: [fields] so we can map them to field items
+ """
+ if marks is None:
+ return None
+ # find all [datasource].[column] strings by positive lookahead of ), space, or string end
+ # some will have three parts so we need to use a lookahead to ensure we capture the entire object
+ matching_marks = re.findall(r"(\[.*?\])(?=\)|\s|$)",str(marks))
+ datasource_fields = {}
+ for mark in matching_marks:
+ # split column into datasource and field display
+ column = mark.split("].[")
+ datasource = _remove_brackets(column[0])
+ # initialize dictionary entry
+ if datasource not in datasource_fields:
+ datasource_fields[datasource] = []
+ # the field is always the last item in the list
+ field_display = _remove_brackets(column[-1])
+ # use ordinal (ok), quantitative (qk), nominal (nk), or string end as lookahead
+ field_match = re.match(r".*?(?<=:)([^:]+)(?=:ok|:qk|:nk|$)", field_display)
+ # if no match, eg. Measure Names, just return the string
+ if field_match:
+ field = field_match.groups(1)[0]
+ else:
+ field = field_display
+ datasource_fields[datasource].append(field)
+ return datasource_fields
+
+def _ds_fields_to_tems(ds_fields, ds_index):
+ fields = []
+ for ds, field_ids in ds_fields.items():
+ fields_dict = ds_index[ds].fields
+ for field_id in field_ids:
+ # many field ids include brackets, so we need to check for these as well
+ field_id_brackets = f"[{field_id}]"
+ if field_id in fields_dict:
+ field = fields_dict.get(field_id)
+ elif field_id_brackets in fields_dict:
+ field = fields_dict.get(field_id_brackets)
+ else:
+ field = field_id
+ fields.append(field)
+ return fields
+
+class Worksheet(object):
+ """
+ A class to parse key attributes of a worksheet.
+ """
+
+ def __init__(self, worksheet_element, ds_index):
+ self._worksheetRoot = worksheet_element
+ self.name = worksheet_element.attrib['name']
+ self._datasource_index = ds_index
+ self._datasources = self._prepare_datasources(self._worksheetRoot, self._datasource_index)
+ self._fields = self._prepare_datasource_dependencies(self._worksheetRoot)
+ self._rows = self._prepare_rows(self._worksheetRoot, self._datasource_index)
+ self._cols = self._prepare_cols(self._worksheetRoot, self._datasource_index)
+ self._filter_fields = self._prepare_filter_fields(self._worksheetRoot, self._datasource_index)
+
+ def __repr__(self):
+ name = self.name
+ datasources = ", ".join([ds.caption or ds.name for ds in self._datasources])
+ fields = ", ".join([f.name for f in self._fields])
+ return f"name: {name}, datasources: {datasources}, fields: {fields}"
+
+ def __iter__(self):
+ keys = self.__dict__.keys()
+ filtered_keys = [key for key in keys if key != "_worksheetRoot"]
+ for key in filtered_keys:
+ yield key.lstrip("_"), getattr(self, key)
+ @staticmethod
+ def _prepare_filter_fields(worksheet_element, ds_index):
+ filters = []
+ slices_list = worksheet_element.find(".//slices")
+ if slices_list is None:
+ return filters
+ slices = [column.text for column in slices_list]
+ # combine slices into single string to use same function as rows/cols
+ ds_fields = _clean_columns(" ".join(slices))
+ if ds_fields == None or len(ds_fields) == 0:
+ return None
+ fields = _ds_fields_to_tems(ds_fields, ds_index)
+ return fields
+
+ @staticmethod
+ def _prepare_datasources(worksheet_element, ds_index):
+ worksheet_datasources = worksheet_element.find(".//datasources")
+ datasource_names = [ds.attrib["name"] for ds in worksheet_datasources]
+ datasource_list = [ds_index[name] for name in datasource_names]
+ return datasource_list
+
+ @property
+ def datasources(self):
+ return self._datasources
+
+ @staticmethod
+ def _prepare_datasource_dependencies(worksheet_element):
+ dependencies = worksheet_element.findall('.//datasource-dependencies')
+ for dependency in dependencies:
+ columns = dependency.findall('.//column')
+ return [Field.from_column_xml(column) for column in columns]
+
+ @property
+ def fields(self):
+ return self._prepare_datasource_dependencies
+
+ @property
+ def fields_list(self):
+ return [field.caption for field in self._fields]
+
+ @staticmethod
+ def _prepare_rows(worksheet_element, ds_index):
+ rows = worksheet_element.find('.//rows')
+ ds_fields = _clean_columns(rows.text)
+ if ds_fields == None or len(ds_fields) == 0:
+ return None
+ fields = _ds_fields_to_tems(ds_fields, ds_index)
+ return fields
+
+ @staticmethod
+ def _prepare_cols(worksheet_element, ds_index):
+ cols = worksheet_element.find('.//cols')
+ ds_fields = _clean_columns(cols.text)
+ if ds_fields == None or len(ds_fields) == 0:
+ return None
+ fields = _ds_fields_to_tems(ds_fields, ds_index)
+ return fields
+
+ @property
+ def rows(self):
+ return self._rows
+
+ @property
+ def cols(self):
+ return self._cols
+
+ @property
+ def filter_fields(self):
+ return self._filter_fields
+
class Workbook(object):
"""A class for writing Tableau workbook files."""
@@ -31,6 +179,8 @@ def __init__(self, filename):
self._worksheets = self._prepare_worksheets(
self._workbookRoot, self._datasource_index)
+
+ self._worksheet_items = self._prepare_worksheet_items(self._workbookRoot, self._datasource_index)
self._shapes = self._prepare_shapes(self._workbookRoot)
@@ -45,6 +195,10 @@ def datasources(self):
@property
def worksheets(self):
return self._worksheets
+
+ @property
+ def worksheet_items(self):
+ return self._worksheet_items
@property
def filename(self):
@@ -142,7 +296,16 @@ def _prepare_worksheets(xml_root, ds_index):
datasource.fields[column_name].add_used_in(worksheet_name)
return worksheets
-
+
+ @staticmethod
+ def _prepare_worksheet_items(xml_root, ds_index):
+ worksheets = []
+ worksheets_element = xml_root.find('.//worksheets')
+ if worksheets_element is None:
+ return worksheets
+ worksheets = [Worksheet(worksheet_element, ds_index) for worksheet_element in worksheets_element]
+ return worksheets
+
@staticmethod
def _prepare_shapes(xml_root):
shapes = []
diff --git a/test/assets/worksheet_no_datasources.twb b/test/assets/worksheet_no_datasources.twb
new file mode 100644
index 0000000..880a44d
--- /dev/null
+++ b/test/assets/worksheet_no_datasources.twb
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/assets/worksheet_no_shelves.twb b/test/assets/worksheet_no_shelves.twb
new file mode 100644
index 0000000..518c2ad
--- /dev/null
+++ b/test/assets/worksheet_no_shelves.twb
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/test_workbook.py b/test/test_workbook.py
index 41501db..769ea24 100644
--- a/test/test_workbook.py
+++ b/test/test_workbook.py
@@ -22,6 +22,16 @@
'filtering.twb'
)
+WORKSHEET_NO_DATASOURCES_FILE = os.path.join(
+ TEST_ASSET_DIR,
+ 'worksheet_no_datasources.twb'
+)
+
+WORKSHEET_NO_SHELVES_FILE = os.path.join(
+ TEST_ASSET_DIR,
+ 'worksheet_no_shelves.twb'
+)
+
class EphemeralFields(unittest.TestCase):
def test_ephemeral_fields_do_not_cause_errors(self):
@@ -49,3 +59,79 @@ def test_dashboards_setup(self):
wb = Workbook(DASHBOARDS_FILE)
self.assertIsNotNone(wb)
self.assertEqual(wb.dashboards, ['setTest'])
+
+class Worksheets(unittest.TestCase):
+ def test_worksheets_setup(self):
+ wb = Workbook(DASHBOARDS_FILE)
+ self.assertEqual(len(wb.worksheet_items), 2)
+ worksheet_names = [ws.name for ws in wb.worksheet_items]
+ worksheet_names.sort()
+ self.assertEqual(worksheet_names[0], 'Sheet 1')
+
+ def test_worksheet_fields_returns_list_not_method(self):
+ # fields property was returning the method object instead of self._fields
+ wb = Workbook(DASHBOARDS_FILE)
+ ws = wb.worksheet_items[0]
+ fields = ws.fields
+ self.assertIsInstance(fields, list, "fields should return a list, not a method object")
+
+ def test_worksheet_fields_are_field_objects(self):
+ wb = Workbook(DASHBOARDS_FILE)
+ ws = wb.worksheet_items[0]
+ from tableaudocumentapi import Field
+ for f in ws.fields:
+ self.assertIsInstance(f, Field)
+
+ def test_worksheet_datasources_not_empty(self):
+ wb = Workbook(DASHBOARDS_FILE)
+ ws = wb.worksheet_items[0]
+ self.assertGreater(len(ws.datasources), 0)
+
+ def test_worksheet_rows_returns_list(self):
+ wb = Workbook(DASHBOARDS_FILE)
+ ws = wb.worksheet_items[0]
+ # Sheet 1 has two fields on rows shelf
+ self.assertIsNotNone(ws.rows)
+ self.assertIsInstance(ws.rows, list)
+ self.assertEqual(len(ws.rows), 2)
+
+ def test_worksheet_cols_empty_shelf_returns_none_or_list(self):
+ # Sheet 1 has an empty element - should not crash
+ wb = Workbook(DASHBOARDS_FILE)
+ ws = wb.worksheet_items[0]
+ result = ws.cols
+ self.assertTrue(result is None or isinstance(result, list))
+
+ def test_worksheet_rows_field_names(self):
+ wb = Workbook(DASHBOARDS_FILE)
+ ws = wb.worksheet_items[0]
+ # Sheet 1 rows: Calculation_88946136969252864 (caption: SHOW) and "Burst Out Set list"
+ # _ds_fields_to_items resolves via datasource.fields, so we get Field objects or raw strings
+ from tableaudocumentapi import Field
+ names = [f.caption if isinstance(f, Field) else f for f in ws.rows]
+ self.assertIn('SHOW', names) # Calculation resolved to its caption
+
+ def test_all_datasource_dependencies_collected(self):
+ # return inside loop meant only first datasource-dependencies block was processed
+ wb = Workbook(DASHBOARDS_FILE)
+ ws = wb.worksheet_items[0]
+ # Sheet 1 has 3 columns in its datasource-dependencies block
+ self.assertEqual(len(ws.fields), 3)
+
+ def test_worksheet_with_no_datasources_element_does_not_crash(self):
+ # _prepare_datasources iterates the result of find(), which is None if element absent
+ wb = Workbook(WORKSHEET_NO_DATASOURCES_FILE)
+ ws = wb.worksheet_items[0]
+ self.assertEqual(ws.datasources, [])
+
+ def test_worksheet_with_no_rows_element_does_not_crash(self):
+ # _prepare_rows calls .text on find() result without None check
+ wb = Workbook(WORKSHEET_NO_SHELVES_FILE)
+ ws = wb.worksheet_items[0]
+ self.assertIsNone(ws.rows)
+
+ def test_worksheet_with_no_cols_element_does_not_crash(self):
+ # _prepare_cols calls .text on find() result without None check
+ wb = Workbook(WORKSHEET_NO_SHELVES_FILE)
+ ws = wb.worksheet_items[0]
+ self.assertIsNone(ws.cols)
\ No newline at end of file