Source code for collabutils.oc

"""
OwnCloud compatible cloud services (e.g. NextCloud) often offer good support for collaborative
editing of tabular data in an online replica of Excel.

Public read-only shares provide a simple way to access such data programmatically.
"""
import typing
import pathlib
import tempfile
import collections

from csvw.dsv import UnicodeWriter
from collabutils.util import warn
try:
    import owncloud
    import openpyxl
except ImportError as e:  # pragma: no cover
    warn('oc.Spreadsheet', 'owncloud', e)

from collabutils.base import SharedSpreadsheetMixin

__all__ = ['Spreadsheet']

File = collections.namedtuple('File', 'name path content_type'.split())


class Share:
    def __init__(self, link):
        self.client = owncloud.Client.from_public_link(link)

    @property
    def files(self):
        return [File(f.name, f.path, f.get_content_type()) for f in self.client.list('')]


[docs]class Spreadsheet(SharedSpreadsheetMixin):
    """
    Typical usage in a `cldfbench.Dataset`'s `cmd_download` method:

    .. code-block:: python

        >>> document = Spreadsheet(
        ...     'test.xlsx', 'https://share.eva.mpg.de/index.php/s/pGaomxWqHPqxeEA')
        >>> document.fetch_sheets(
        ...     sheets={'Varieties': 'languages.tsv'})
        ...     outdir=dataset.etc_dir,
        ...     delimiter='\t')
    """
    def __init__(self, fname, share_link):
        share = Share(share_link)
        for f in share.files:
            if f.name == fname:
                self.file = f
                break
        else:
            raise ValueError(fname)
        self.client = share.client

    def fetch_sheets(
            self,
            sheets: typing.Optional[typing.Dict[str, str]] = None,
            outdir: typing.Optional[typing.Union[pathlib.Path, str]] = '.',
            **kw,
    ):
        import shutil

        def _excel_value(x):
            if x is None:
                return ""
            if isinstance(x, float) and int(x) == x:
                # Since Excel does not have an integer type, integers are rendered as "n.0",
                # which in turn confuses type detection of tools like csvkit. Thus, we normalize
                # numbers of the form "n.0" to "n".
                return '{0}'.format(int(x))  # pragma: no cover
            return '{0}'.format(x).strip()

        with tempfile.TemporaryDirectory() as tmp:
            tmppath = str(pathlib.Path(tmp) / self.file.name)
            self.client.get_file(self.file.path, tmppath)
            shutil.copy(tmppath, 'lexibank.xlsx')
            outdir = outdir or self
            wb = openpyxl.load_workbook(tmppath, data_only=True)
            print(wb.sheetnames)
            for i, sname in enumerate(wb.sheetnames, start=1):
                if sheets is None or (sname in sheets):
                    fname = 'sheet_{}.csv'.format(i) if sheets is None else sheets[sname]
                    sheet = wb[sname]
                    with UnicodeWriter(pathlib.Path(outdir) / fname, **kw) as writer:
                        for row in sheet.rows:
                            writer.writerow([_excel_value(col.value) for col in row])