initial commit 2

author: stephanchrst <stephanchrst@gmail.com> 2022-05-10 21:51:50 +0700
committer: stephanchrst <stephanchrst@gmail.com> 2022-05-10 21:51:50 +0700
commit: 3751379f1e9a4c215fb6eb898b4ccc67659b9ace (patch)
tree: a44932296ef4a9b71d5f010906253d8c53727726 /addons/base_import/models
parent: 0a15094050bfde69a06d6eff798e9a8ddf2b8c21 (diff)
4 files changed, 1261 insertions, 0 deletions
diff --git a/addons/base_import/models/__init__.py b/addons/base_import/models/__init__.py
new file mode 100644
index 00000000..f349a3ca
--- /dev/null
+++ b/addons/base_import/models/__init__.py
@@ -0,0 +1,5 @@
+# -*- coding: utf-8 -*-
+# Part of Odoo. See LICENSE file for full copyright and licensing details.
+
+from . import base_import
+from . import test_models
diff --git a/addons/base_import/models/base_import.py b/addons/base_import/models/base_import.py
new file mode 100644
index 00000000..733c3060
--- /dev/null
+++ b/addons/base_import/models/base_import.py
@@ -0,0 +1,1050 @@
+# -*- coding: utf-8 -*-
+# Part of Odoo. See LICENSE file for full copyright and licensing details.
+
+import base64
+import binascii
+import codecs
+import collections
+import unicodedata
+
+import chardet
+import datetime
+import io
+import itertools
+import logging
+import psycopg2
+import operator
+import os
+import re
+import requests
+
+from PIL import Image
+
+from odoo import api, fields, models
+from odoo.exceptions import AccessError
+from odoo.tools.translate import _
+from odoo.tools.mimetypes import guess_mimetype
+from odoo.tools import config, DEFAULT_SERVER_DATE_FORMAT, DEFAULT_SERVER_DATETIME_FORMAT, pycompat
+
+FIELDS_RECURSION_LIMIT = 3
+ERROR_PREVIEW_BYTES = 200
+DEFAULT_IMAGE_TIMEOUT = 3
+DEFAULT_IMAGE_MAXBYTES = 10 * 1024 * 1024
+DEFAULT_IMAGE_REGEX = r"^(?:http|https)://"
+DEFAULT_IMAGE_CHUNK_SIZE = 32768
+IMAGE_FIELDS = ["icon", "image", "logo", "picture"]
+_logger = logging.getLogger(__name__)
+BOM_MAP = {
+    'utf-16le': codecs.BOM_UTF16_LE,
+    'utf-16be': codecs.BOM_UTF16_BE,
+    'utf-32le': codecs.BOM_UTF32_LE,
+    'utf-32be': codecs.BOM_UTF32_BE,
+}
+
+try:
+    import xlrd
+    try:
+        from xlrd import xlsx
+    except ImportError:
+        xlsx = None
+except ImportError:
+    xlrd = xlsx = None
+
+try:
+    from . import odf_ods_reader
+except ImportError:
+    odf_ods_reader = None
+
+FILE_TYPE_DICT = {
+    'text/csv': ('csv', True, None),
+    'application/vnd.ms-excel': ('xls', xlrd, 'xlrd'),
+    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ('xlsx', xlsx, 'xlrd >= 1.0.0'),
+    'application/vnd.oasis.opendocument.spreadsheet': ('ods', odf_ods_reader, 'odfpy')
+}
+EXTENSIONS = {
+    '.' + ext: handler
+    for mime, (ext, handler, req) in FILE_TYPE_DICT.items()
+}
+
+class Base(models.AbstractModel):
+    _inherit = 'base'
+
+    @api.model
+    def get_import_templates(self):
+        """
+        Get the import templates label and path.
+
+        :return: a list(dict) containing label and template path
+                 like ``[{'label': 'foo', 'template': 'path'}]``
+        """
+        return []
+
+class ImportMapping(models.Model):
+    """ mapping of previous column:field selections
+
+    This is useful when repeatedly importing from a third-party
+    system: column names generated by the external system may
+    not match Odoo's field names or labels. This model is used
+    to save the mapping between column names and fields so that
+    next time a user imports from the same third-party systems
+    we can automatically match the columns to the correct field
+    without them having to re-enter the mapping every single
+    time.
+    """
+    _name = 'base_import.mapping'
+    _description = 'Base Import Mapping'
+
+    res_model = fields.Char(index=True)
+    column_name = fields.Char()
+    field_name = fields.Char()
+
+
+class ResUsers(models.Model):
+    _inherit = 'res.users'
+
+    def _can_import_remote_urls(self):
+        """ Hook to decide whether the current user is allowed to import
+        images via URL (as such an import can DOS a worker). By default,
+        allows the administrator group.
+
+        :rtype: bool
+        """
+        self.ensure_one()
+        return self._is_admin()
+
+class Import(models.TransientModel):
+
+    _name = 'base_import.import'
+    _description = 'Base Import'
+
+    # allow imports to survive for 12h in case user is slow
+    _transient_max_hours = 12.0
+
+    res_model = fields.Char('Model')
+    file = fields.Binary('File', help="File to check and/or import, raw binary (not base64)", attachment=False)
+    file_name = fields.Char('File Name')
+    file_type = fields.Char('File Type')
+
+    @api.model
+    def get_fields(self, model, depth=FIELDS_RECURSION_LIMIT):
+        """ Recursively get fields for the provided model (through
+        fields_get) and filter them according to importability
+
+        The output format is a list of ``Field``, with ``Field``
+        defined as:
+
+        .. class:: Field
+
+            .. attribute:: id (str)
+
+                A non-unique identifier for the field, used to compute
+                the span of the ``required`` attribute: if multiple
+                ``required`` fields have the same id, only one of them
+                is necessary.
+
+            .. attribute:: name (str)
+
+                The field's logical (Odoo) name within the scope of
+                its parent.
+
+            .. attribute:: string (str)
+
+                The field's human-readable name (``@string``)
+
+            .. attribute:: required (bool)
+
+                Whether the field is marked as required in the
+                model. Clients must provide non-empty import values
+                for all required fields or the import will error out.
+
+            .. attribute:: fields (list(Field))
+
+                The current field's subfields. The database and
+                external identifiers for m2o and m2m fields; a
+                filtered and transformed fields_get for o2m fields (to
+                a variable depth defined by ``depth``).
+
+                Fields with no sub-fields will have an empty list of
+                sub-fields.
+
+        :param str model: name of the model to get fields form
+        :param int depth: depth of recursion into o2m fields
+        """
+        Model = self.env[model]
+        importable_fields = [{
+            'id': 'id',
+            'name': 'id',
+            'string': _("External ID"),
+            'required': False,
+            'fields': [],
+            'type': 'id',
+        }]
+        if not depth:
+            return importable_fields
+
+        model_fields = Model.fields_get()
+        blacklist = models.MAGIC_COLUMNS + [Model.CONCURRENCY_CHECK_FIELD]
+        for name, field in model_fields.items():
+            if name in blacklist:
+                continue
+            # an empty string means the field is deprecated, @deprecated must
+            # be absent or False to mean not-deprecated
+            if field.get('deprecated', False) is not False:
+                continue
+            if field.get('readonly'):
+                states = field.get('states')
+                if not states:
+                    continue
+                # states = {state: [(attr, value), (attr2, value2)], state2:...}
+                if not any(attr == 'readonly' and value is False
+                           for attr, value in itertools.chain.from_iterable(states.values())):
+                    continue
+            field_value = {
+                'id': name,
+                'name': name,
+                'string': field['string'],
+                # Y U NO ALWAYS HAS REQUIRED
+                'required': bool(field.get('required')),
+                'fields': [],
+                'type': field['type'],
+            }
+
+            if field['type'] in ('many2many', 'many2one'):
+                field_value['fields'] = [
+                    dict(field_value, name='id', string=_("External ID"), type='id'),
+                    dict(field_value, name='.id', string=_("Database ID"), type='id'),
+                ]
+            elif field['type'] == 'one2many':
+                field_value['fields'] = self.get_fields(field['relation'], depth=depth-1)
+                if self.user_has_groups('base.group_no_one'):
+                    field_value['fields'].append({'id': '.id', 'name': '.id', 'string': _("Database ID"), 'required': False, 'fields': [], 'type': 'id'})
+
+            importable_fields.append(field_value)
+
+        # TODO: cache on model?
+        return importable_fields
+
+    def _read_file(self, options):
+        """ Dispatch to specific method to read file content, according to its mimetype or file type
+            :param options : dict of reading options (quoting, separator, ...)
+        """
+        self.ensure_one()
+        # guess mimetype from file content
+        mimetype = guess_mimetype(self.file or b'')
+        (file_extension, handler, req) = FILE_TYPE_DICT.get(mimetype, (None, None, None))
+        if handler:
+            try:
+                return getattr(self, '_read_' + file_extension)(options)
+            except Exception:
+                _logger.warning("Failed to read file '%s' (transient id %d) using guessed mimetype %s", self.file_name or '<unknown>', self.id, mimetype)
+
+        # try reading with user-provided mimetype
+        (file_extension, handler, req) = FILE_TYPE_DICT.get(self.file_type, (None, None, None))
+        if handler:
+            try:
+                return getattr(self, '_read_' + file_extension)(options)
+            except Exception:
+                _logger.warning("Failed to read file '%s' (transient id %d) using user-provided mimetype %s", self.file_name or '<unknown>', self.id, self.file_type)
+
+        # fallback on file extensions as mime types can be unreliable (e.g.
+        # software setting incorrect mime types, or non-installed software
+        # leading to browser not sending mime types)
+        if self.file_name:
+            p, ext = os.path.splitext(self.file_name)
+            if ext in EXTENSIONS:
+                try:
+                    return getattr(self, '_read_' + ext[1:])(options)
+                except Exception:
+                    _logger.warning("Failed to read file '%s' (transient id %s) using file extension", self.file_name, self.id)
+
+        if req:
+            raise ImportError(_("Unable to load \"{extension}\" file: requires Python module \"{modname}\"").format(extension=file_extension, modname=req))
+        raise ValueError(_("Unsupported file format \"{}\", import only supports CSV, ODS, XLS and XLSX").format(self.file_type))
+
+    def _read_xls(self, options):
+        """ Read file content, using xlrd lib """
+        book = xlrd.open_workbook(file_contents=self.file or b'')
+        sheets = options['sheets'] = book.sheet_names()
+        sheet = options['sheet'] = options.get('sheet') or sheets[0]
+        return self._read_xls_book(book, sheet)
+
+    def _read_xls_book(self, book, sheet_name):
+        sheet = book.sheet_by_name(sheet_name)
+        # emulate Sheet.get_rows for pre-0.9.4
+        for rowx, row in enumerate(map(sheet.row, range(sheet.nrows)), 1):
+            values = []
+            for colx, cell in enumerate(row, 1):
+                if cell.ctype is xlrd.XL_CELL_NUMBER:
+                    is_float = cell.value % 1 != 0.0
+                    values.append(
+                        str(cell.value)
+                        if is_float
+                        else str(int(cell.value))
+                    )
+                elif cell.ctype is xlrd.XL_CELL_DATE:
+                    is_datetime = cell.value % 1 != 0.0
+                    # emulate xldate_as_datetime for pre-0.9.3
+                    dt = datetime.datetime(*xlrd.xldate.xldate_as_tuple(cell.value, book.datemode))
+                    values.append(
+                        dt.strftime(DEFAULT_SERVER_DATETIME_FORMAT)
+                        if is_datetime
+                        else dt.strftime(DEFAULT_SERVER_DATE_FORMAT)
+                    )
+                elif cell.ctype is xlrd.XL_CELL_BOOLEAN:
+                    values.append(u'True' if cell.value else u'False')
+                elif cell.ctype is xlrd.XL_CELL_ERROR:
+                    raise ValueError(
+                        _("Invalid cell value at row %(row)s, column %(col)s: %(cell_value)s") % {
+                            'row': rowx,
+                            'col': colx,
+                            'cell_value': xlrd.error_text_from_code.get(cell.value, _("unknown error code %s", cell.value))
+                        }
+                    )
+                else:
+                    values.append(cell.value)
+            if any(x for x in values if x.strip()):
+                yield values
+
+    # use the same method for xlsx and xls files
+    _read_xlsx = _read_xls
+
+    def _read_ods(self, options):
+        """ Read file content using ODSReader custom lib """
+        doc = odf_ods_reader.ODSReader(file=io.BytesIO(self.file or b''))
+        sheets = options['sheets'] = list(doc.SHEETS.keys())
+        sheet = options['sheet'] = options.get('sheet') or sheets[0]
+
+        return (
+            row
+            for row in doc.getSheet(sheet)
+            if any(x for x in row if x.strip())
+        )
+
+    def _read_csv(self, options):
+        """ Returns a CSV-parsed iterator of all non-empty lines in the file
+            :throws csv.Error: if an error is detected during CSV parsing
+        """
+        csv_data = self.file or b''
+        if not csv_data:
+            return iter([])
+
+        encoding = options.get('encoding')
+        if not encoding:
+            encoding = options['encoding'] = chardet.detect(csv_data)['encoding'].lower()
+            # some versions of chardet (e.g. 2.3.0 but not 3.x) will return
+            # utf-(16|32)(le|be), which for python means "ignore / don't strip
+            # BOM". We don't want that, so rectify the encoding to non-marked
+            # IFF the guessed encoding is LE/BE and csv_data starts with a BOM
+            bom = BOM_MAP.get(encoding)
+            if bom and csv_data.startswith(bom):
+                encoding = options['encoding'] = encoding[:-2]
+
+        if encoding != 'utf-8':
+            csv_data = csv_data.decode(encoding).encode('utf-8')
+
+        separator = options.get('separator')
+        if not separator:
+            # default for unspecified separator so user gets a message about
+            # having to specify it
+            separator = ','
+            for candidate in (',', ';', '\t', ' ', '|', unicodedata.lookup('unit separator')):
+                # pass through the CSV and check if all rows are the same
+                # length & at least 2-wide assume it's the correct one
+                it = pycompat.csv_reader(io.BytesIO(csv_data), quotechar=options['quoting'], delimiter=candidate)
+                w = None
+                for row in it:
+                    width = len(row)
+                    if w is None:
+                        w = width
+                    if width == 1 or width != w:
+                        break # next candidate
+                else: # nobreak
+                    separator = options['separator'] = candidate
+                    break
+
+        csv_iterator = pycompat.csv_reader(
+            io.BytesIO(csv_data),
+            quotechar=options['quoting'],
+            delimiter=separator)
+
+        return (
+            row for row in csv_iterator
+            if any(x for x in row if x.strip())
+        )
+
+    @api.model
+    def _try_match_column(self, preview_values, options):
+        """ Returns the potential field types, based on the preview values, using heuristics
+            :param preview_values : list of value for the column to determine
+            :param options : parsing options
+        """
+        values = set(preview_values)
+        # If all values are empty in preview than can be any field
+        if values == {''}:
+            return ['all']
+
+        # If all values starts with __export__ this is probably an id
+        if all(v.startswith('__export__') for v in values):
+            return ['id', 'many2many', 'many2one', 'one2many']
+
+        # If all values can be cast to int type is either id, float or monetary
+        # Exception: if we only have 1 and 0, it can also be a boolean
+        if all(v.isdigit() for v in values if v):
+            field_type = ['id', 'integer', 'char', 'float', 'monetary', 'many2one', 'many2many', 'one2many']
+            if {'0', '1', ''}.issuperset(values):
+                field_type.append('boolean')
+            return field_type
+
+        # If all values are either True or False, type is boolean
+        if all(val.lower() in ('true', 'false', 't', 'f', '') for val in preview_values):
+            return ['boolean']
+
+        # If all values can be cast to float, type is either float or monetary
+        results = []
+        try:
+            thousand_separator = decimal_separator = False
+            for val in preview_values:
+                val = val.strip()
+                if not val:
+                    continue
+                # value might have the currency symbol left or right from the value
+                val = self._remove_currency_symbol(val)
+                if val:
+                    if options.get('float_thousand_separator') and options.get('float_decimal_separator'):
+                        val = val.replace(options['float_thousand_separator'], '').replace(options['float_decimal_separator'], '.')
+                    # We are now sure that this is a float, but we still need to find the
+                    # thousand and decimal separator
+                    else:
+                        if val.count('.') > 1:
+                            options['float_thousand_separator'] = '.'
+                            options['float_decimal_separator'] = ','
+                        elif val.count(',') > 1:
+                            options['float_thousand_separator'] = ','
+                            options['float_decimal_separator'] = '.'
+                        elif val.find('.') > val.find(','):
+                            thousand_separator = ','
+                            decimal_separator = '.'
+                        elif val.find(',') > val.find('.'):
+                            thousand_separator = '.'
+                            decimal_separator = ','
+                else:
+                    # This is not a float so exit this try
+                    float('a')
+            if thousand_separator and not options.get('float_decimal_separator'):
+                options['float_thousand_separator'] = thousand_separator
+                options['float_decimal_separator'] = decimal_separator
+            results = ['float', 'monetary']
+        except ValueError:
+            pass
+
+        results += self._try_match_date_time(preview_values, options)
+        if results:
+            return results
+
+        return ['id', 'text', 'boolean', 'char', 'datetime', 'selection', 'many2one', 'one2many', 'many2many', 'html']
+
+
+    def _try_match_date_time(self, preview_values, options):
+        # Or a date/datetime if it matches the pattern
+        date_patterns = [options['date_format']] if options.get(
+            'date_format') else []
+        user_date_format = self.env['res.lang']._lang_get(self.env.user.lang).date_format
+        if user_date_format:
+            try:
+                to_re(user_date_format)
+                date_patterns.append(user_date_format)
+            except KeyError:
+                pass
+        date_patterns.extend(DATE_PATTERNS)
+        match = check_patterns(date_patterns, preview_values)
+        if match:
+            options['date_format'] = match
+            return ['date', 'datetime']
+
+        datetime_patterns = [options['datetime_format']] if options.get(
+            'datetime_format') else []
+        datetime_patterns.extend(
+            "%s %s" % (d, t)
+            for d in date_patterns
+            for t in TIME_PATTERNS
+        )
+        match = check_patterns(datetime_patterns, preview_values)
+        if match:
+            options['datetime_format'] = match
+            return ['datetime']
+
+        return []
+
+    @api.model
+    def _find_type_from_preview(self, options, preview):
+        type_fields = []
+        if preview:
+            for column in range(0, len(preview[0])):
+                preview_values = [value[column].strip() for value in preview]
+                type_field = self._try_match_column(preview_values, options)
+                type_fields.append(type_field)
+        return type_fields
+
+    def _match_header(self, header, fields, options):
+        """ Attempts to match a given header to a field of the
+            imported model.
+
+            :param str header: header name from the CSV file
+            :param fields:
+            :param dict options:
+            :returns: an empty list if the header couldn't be matched, or
+                      all the fields to traverse
+            :rtype: list(Field)
+        """
+        string_match = None
+        IrTranslation = self.env['ir.translation']
+        for field in fields:
+            # FIXME: should match all translations & original
+            # TODO: use string distance (levenshtein? hamming?)
+            if header.lower() == field['name'].lower():
+                return [field]
+            if header.lower() == field['string'].lower():
+                # matching string are not reliable way because
+                # strings have no unique constraint
+                string_match = field
+            translated_header = IrTranslation._get_source('ir.model.fields,field_description', 'model', self.env.lang, header).lower()
+            if translated_header == field['string'].lower():
+                string_match = field
+        if string_match:
+            # this behavior is only applied if there is no matching field['name']
+            return [string_match]
+
+        if '/' not in header:
+            return []
+
+        # relational field path
+        traversal = []
+        subfields = fields
+        # Iteratively dive into fields tree
+        for section in header.split('/'):
+            # Strip section in case spaces are added around '/' for
+            # readability of paths
+            match = self._match_header(section.strip(), subfields, options)
+            # Any match failure, exit
+            if not match:
+                return []
+            # prep subfields for next iteration within match[0]
+            field = match[0]
+            subfields = field['fields']
+            traversal.append(field)
+        return traversal
+
+    def _match_headers(self, rows, fields, options):
+        """ Attempts to match the imported model's fields to the
+            titles of the parsed CSV file, if the file is supposed to have
+            headers.
+
+            Will consume the first line of the ``rows`` iterator.
+
+            Returns the list of headers and a dict mapping cell indices
+            to key paths in the ``fields`` tree. If headers were not
+            requested, both collections are empty.
+
+            :param Iterator rows:
+            :param dict fields:
+            :param dict options:
+            :rtype: (list(str), dict(int: list(str)))
+        """
+        if not options.get('headers'):
+            return [], {}
+
+        headers = next(rows, None)
+        if not headers:
+            return [], {}
+
+        matches = {}
+        mapping_records = self.env['base_import.mapping'].search_read([('res_model', '=', self.res_model)], ['column_name', 'field_name'])
+        mapping_fields = {rec['column_name']: rec['field_name'] for rec in mapping_records}
+        for index, header in enumerate(headers):
+            match_field = []
+            mapping_field_name = mapping_fields.get(header.lower())
+            if mapping_field_name:
+                match_field = mapping_field_name.split('/')
+            if not match_field:
+                match_field = [field['name'] for field in self._match_header(header, fields, options)]
+            matches[index] = match_field or None
+        return headers, matches
+
+    def parse_preview(self, options, count=10):
+        """ Generates a preview of the uploaded files, and performs
+            fields-matching between the import's file data and the model's
+            columns.
+
+            If the headers are not requested (not options.headers),
+            ``matches`` and ``headers`` are both ``False``.
+
+            :param int count: number of preview lines to generate
+            :param options: format-specific options.
+                            CSV: {quoting, separator, headers}
+            :type options: {str, str, str, bool}
+            :returns: {fields, matches, headers, preview} | {error, preview}
+            :rtype: {dict(str: dict(...)), dict(int, list(str)), list(str), list(list(str))} | {str, str}
+        """
+        self.ensure_one()
+        fields = self.get_fields(self.res_model)
+        try:
+            rows = self._read_file(options)
+            headers, matches = self._match_headers(rows, fields, options)
+            # Match should have consumed the first row (iif headers), get
+            # the ``count`` next rows for preview
+            preview = list(itertools.islice(rows, count))
+            assert preview, "file seems to have no content"
+            header_types = self._find_type_from_preview(options, preview)
+            if options.get('keep_matches') and len(options.get('fields', [])):
+                matches = {}
+                for index, match in enumerate(options.get('fields')):
+                    if match:
+                        matches[index] = match.split('/')
+
+            if options.get('keep_matches'):
+                advanced_mode = options.get('advanced')
+            else:
+                # Check is label contain relational field
+                has_relational_header = any(len(models.fix_import_export_id_paths(col)) > 1 for col in headers)
+                # Check is matches fields have relational field
+                has_relational_match = any(len(match) > 1 for field, match in matches.items() if match)
+                advanced_mode = has_relational_header or has_relational_match
+
+            batch = False
+            batch_cutoff = options.get('limit')
+            if batch_cutoff:
+                if count > batch_cutoff:
+                    batch = len(preview) > batch_cutoff
+                else:
+                    batch = bool(next(
+                        itertools.islice(rows, batch_cutoff - count, None),
+                        None
+                    ))
+
+            return {
+                'fields': fields,
+                'matches': matches or False,
+                'headers': headers or False,
+                'headers_type': header_types or False,
+                'preview': preview,
+                'options': options,
+                'advanced_mode': advanced_mode,
+                'debug': self.user_has_groups('base.group_no_one'),
+                'batch': batch,
+            }
+        except Exception as error:
+            # Due to lazy generators, UnicodeDecodeError (for
+            # instance) may only be raised when serializing the
+            # preview to a list in the return.
+            _logger.debug("Error during parsing preview", exc_info=True)
+            preview = None
+            if self.file_type == 'text/csv' and self.file:
+                preview = self.file[:ERROR_PREVIEW_BYTES].decode('iso-8859-1')
+            return {
+                'error': str(error),
+                # iso-8859-1 ensures decoding will always succeed,
+                # even if it yields non-printable characters. This is
+                # in case of UnicodeDecodeError (or csv.Error
+                # compounded with UnicodeDecodeError)
+                'preview': preview,
+            }
+
+    @api.model
+    def _convert_import_data(self, fields, options):
+        """ Extracts the input BaseModel and fields list (with
+            ``False``-y placeholders for fields to *not* import) into a
+            format Model.import_data can use: a fields list without holes
+            and the precisely matching data matrix
+
+            :param list(str|bool): fields
+            :returns: (data, fields)
+            :rtype: (list(list(str)), list(str))
+            :raises ValueError: in case the import data could not be converted
+        """
+        # Get indices for non-empty fields
+        indices = [index for index, field in enumerate(fields) if field]
+        if not indices:
+            raise ValueError(_("You must configure at least one field to import"))
+        # If only one index, itemgetter will return an atom rather
+        # than a 1-tuple
+        if len(indices) == 1:
+            mapper = lambda row: [row[indices[0]]]
+        else:
+            mapper = operator.itemgetter(*indices)
+        # Get only list of actually imported fields
+        import_fields = [f for f in fields if f]
+
+        rows_to_import = self._read_file(options)
+        if options.get('headers'):
+            rows_to_import = itertools.islice(rows_to_import, 1, None)
+        data = [
+            list(row) for row in map(mapper, rows_to_import)
+            # don't try inserting completely empty rows (e.g. from
+            # filtering out o2m fields)
+            if any(row)
+        ]
+
+        # slicing needs to happen after filtering out empty rows as the
+        # data offsets from load are post-filtering
+        return data[options.get('skip'):], import_fields
+
+    @api.model
+    def _remove_currency_symbol(self, value):
+        value = value.strip()
+        negative = False
+        # Careful that some countries use () for negative so replace it by - sign
+        if value.startswith('(') and value.endswith(')'):
+            value = value[1:-1]
+            negative = True
+        float_regex = re.compile(r'([+-]?[0-9.,]+)')
+        split_value = [g for g in float_regex.split(value) if g]
+        if len(split_value) > 2:
+            # This is probably not a float
+            return False
+        if len(split_value) == 1:
+            if float_regex.search(split_value[0]) is not None:
+                return split_value[0] if not negative else '-' + split_value[0]
+            return False
+        else:
+            # String has been split in 2, locate which index contains the float and which does not
+            currency_index = 0
+            if float_regex.search(split_value[0]) is not None:
+                currency_index = 1
+            # Check that currency exists
+            currency = self.env['res.currency'].search([('symbol', '=', split_value[currency_index].strip())])
+            if len(currency):
+                return split_value[(currency_index + 1) % 2] if not negative else '-' + split_value[(currency_index + 1) % 2]
+            # Otherwise it is not a float with a currency symbol
+            return False
+
+    @api.model
+    def _parse_float_from_data(self, data, index, name, options):
+        for line in data:
+            line[index] = line[index].strip()
+            if not line[index]:
+                continue
+            thousand_separator, decimal_separator = self._infer_separators(line[index], options)
+
+            if 'E' in line[index] or 'e' in line[index]:
+                tmp_value = line[index].replace(thousand_separator, '.')
+                try:
+                    tmp_value = '{:f}'.format(float(tmp_value))
+                    line[index] = tmp_value
+                    thousand_separator = ' '
+                except Exception:
+                    pass
+
+            line[index] = line[index].replace(thousand_separator, '').replace(decimal_separator, '.')
+            old_value = line[index]
+            line[index] = self._remove_currency_symbol(line[index])
+            if line[index] is False:
+                raise ValueError(_("Column %s contains incorrect values (value: %s)", name, old_value))
+
+    def _infer_separators(self, value, options):
+        """ Try to infer the shape of the separators: if there are two
+        different "non-numberic" characters in the number, the
+        former/duplicated one would be grouping ("thousands" separator) and
+        the latter would be the decimal separator. The decimal separator
+        should furthermore be unique.
+        """
+        # can't use \p{Sc} using re so handroll it
+        non_number = [
+            # any character
+            c for c in value
+            # which is not a numeric decoration (() is used for negative
+            # by accountants)
+            if c not in '()-+'
+            # which is not a digit or a currency symbol
+            if unicodedata.category(c) not in ('Nd', 'Sc')
+        ]
+
+        counts = collections.Counter(non_number)
+        # if we have two non-numbers *and* the last one has a count of 1,
+        # we probably have grouping & decimal separators
+        if len(counts) == 2 and counts[non_number[-1]] == 1:
+            return [character for character, _count in counts.most_common()]
+
+        # otherwise get whatever's in the options, or fallback to a default
+        thousand_separator = options.get('float_thousand_separator', ' ')
+        decimal_separator = options.get('float_decimal_separator', '.')
+        return thousand_separator, decimal_separator
+
+    def _parse_import_data(self, data, import_fields, options):
+        """ Lauch first call to _parse_import_data_recursive with an
+        empty prefix. _parse_import_data_recursive will be run
+        recursively for each relational field.
+        """
+        return self._parse_import_data_recursive(self.res_model, '', data, import_fields, options)
+
+    def _parse_import_data_recursive(self, model, prefix, data, import_fields, options):
+        # Get fields of type date/datetime
+        all_fields = self.env[model].fields_get()
+        for name, field in all_fields.items():
+            name = prefix + name
+            if field['type'] in ('date', 'datetime') and name in import_fields:
+                index = import_fields.index(name)
+                self._parse_date_from_data(data, index, name, field['type'], options)
+            # Check if the field is in import_field and is a relational (followed by /)
+            # Also verify that the field name exactly match the import_field at the correct level.
+            elif any(name + '/' in import_field and name == import_field.split('/')[prefix.count('/')] for import_field in import_fields):
+                # Recursive call with the relational as new model and add the field name to the prefix
+                self._parse_import_data_recursive(field['relation'], name + '/', data, import_fields, options)
+            elif field['type'] in ('float', 'monetary') and name in import_fields:
+                # Parse float, sometimes float values from file have currency symbol or () to denote a negative value
+                # We should be able to manage both case
+                index = import_fields.index(name)
+                self._parse_float_from_data(data, index, name, options)
+            elif field['type'] == 'binary' and field.get('attachment') and any(f in name for f in IMAGE_FIELDS) and name in import_fields:
+                index = import_fields.index(name)
+
+                with requests.Session() as session:
+                    session.stream = True
+
+                    for num, line in enumerate(data):
+                        if re.match(config.get("import_image_regex", DEFAULT_IMAGE_REGEX), line[index]):
+                            if not self.env.user._can_import_remote_urls():
+                                raise AccessError(_("You can not import images via URL, check with your administrator or support for the reason."))
+
+                            line[index] = self._import_image_by_url(line[index], session, name, num)
+                        else:
+                            try:
+                                base64.b64decode(line[index], validate=True)
+                            except binascii.Error:
+                                raise ValueError(_("Found invalid image data, images should be imported as either URLs or base64-encoded data."))
+
+        return data
+
+    def _parse_date_from_data(self, data, index, name, field_type, options):
+        dt = datetime.datetime
+        fmt = fields.Date.to_string if field_type == 'date' else fields.Datetime.to_string
+        d_fmt = options.get('date_format')
+        dt_fmt = options.get('datetime_format')
+        for num, line in enumerate(data):
+            if not line[index]:
+                continue
+
+            v = line[index].strip()
+            try:
+                # first try parsing as a datetime if it's one
+                if dt_fmt and field_type == 'datetime':
+                    try:
+                        line[index] = fmt(dt.strptime(v, dt_fmt))
+                        continue
+                    except ValueError:
+                        pass
+                # otherwise try parsing as a date whether it's a date
+                # or datetime
+                line[index] = fmt(dt.strptime(v, d_fmt))
+            except ValueError as e:
+                raise ValueError(_("Column %s contains incorrect values. Error in line %d: %s") % (name, num + 1, e))
+            except Exception as e:
+                raise ValueError(_("Error Parsing Date [%s:L%d]: %s") % (name, num + 1, e))
+
+    def _import_image_by_url(self, url, session, field, line_number):
+        """ Imports an image by URL
+
+        :param str url: the original field value
+        :param requests.Session session:
+        :param str field: name of the field (for logging/debugging)
+        :param int line_number: 0-indexed line number within the imported file (for logging/debugging)
+        :return: the replacement value
+        :rtype: bytes
+        """
+        maxsize = int(config.get("import_image_maxbytes", DEFAULT_IMAGE_MAXBYTES))
+        _logger.debug("Trying to import image from URL: %s into field %s, at line %s" % (url, field, line_number))
+        try:
+            response = session.get(url, timeout=int(config.get("import_image_timeout", DEFAULT_IMAGE_TIMEOUT)))
+            response.raise_for_status()
+
+            if response.headers.get('Content-Length') and int(response.headers['Content-Length']) > maxsize:
+                raise ValueError(_("File size exceeds configured maximum (%s bytes)", maxsize))
+
+            content = bytearray()
+            for chunk in response.iter_content(DEFAULT_IMAGE_CHUNK_SIZE):
+                content += chunk
+                if len(content) > maxsize:
+                    raise ValueError(_("File size exceeds configured maximum (%s bytes)", maxsize))
+
+            image = Image.open(io.BytesIO(content))
+            w, h = image.size
+            if w * h > 42e6:  # Nokia Lumia 1020 photo resolution
+                raise ValueError(
+                    u"Image size excessive, imported images must be smaller "
+                    u"than 42 million pixel")
+
+            return base64.b64encode(content)
+        except Exception as e:
+            _logger.exception(e)
+            raise ValueError(_("Could not retrieve URL: %(url)s [%(field_name)s: L%(line_number)d]: %(error)s") % {
+                'url': url,
+                'field_name': field,
+                'line_number': line_number + 1,
+                'error': e
+            })
+
+    def do(self, fields, columns, options, dryrun=False):
+        """ Actual execution of the import
+
+        :param fields: import mapping: maps each column to a field,
+                       ``False`` for the columns to ignore
+        :type fields: list(str|bool)
+        :param columns: columns label
+        :type columns: list(str|bool)
+        :param dict options:
+        :param bool dryrun: performs all import operations (and
+                            validations) but rollbacks writes, allows
+                            getting as much errors as possible without
+                            the risk of clobbering the database.
+        :returns: A list of errors. If the list is empty the import
+                  executed fully and correctly. If the list is
+                  non-empty it contains dicts with 3 keys ``type`` the
+                  type of error (``error|warning``); ``message`` the
+                  error message associated with the error (a string)
+                  and ``record`` the data which failed to import (or
+                  ``false`` if that data isn't available or provided)
+        :rtype: dict(ids: list(int), messages: list({type, message, record}))
+        """
+        self.ensure_one()
+        self._cr.execute('SAVEPOINT import')
+
+        try:
+            data, import_fields = self._convert_import_data(fields, options)
+            # Parse date and float field
+            data = self._parse_import_data(data, import_fields, options)
+        except ValueError as error:
+            return {
+                'messages': [{
+                    'type': 'error',
+                    'message': str(error),
+                    'record': False,
+                }]
+            }
+
+        _logger.info('importing %d rows...', len(data))
+
+        name_create_enabled_fields = options.pop('name_create_enabled_fields', {})
+        import_limit = options.pop('limit', None)
+        model = self.env[self.res_model].with_context(import_file=True, name_create_enabled_fields=name_create_enabled_fields, _import_limit=import_limit)
+        import_result = model.load(import_fields, data)
+        _logger.info('done')
+
+        # If transaction aborted, RELEASE SAVEPOINT is going to raise
+        # an InternalError (ROLLBACK should work, maybe). Ignore that.
+        # TODO: to handle multiple errors, create savepoint around
+        #       write and release it in case of write error (after
+        #       adding error to errors array) => can keep on trying to
+        #       import stuff, and rollback at the end if there is any
+        #       error in the results.
+        try:
+            if dryrun:
+                self._cr.execute('ROLLBACK TO SAVEPOINT import')
+                # cancel all changes done to the registry/ormcache
+                self.pool.clear_caches()
+                self.pool.reset_changes()
+            else:
+                self._cr.execute('RELEASE SAVEPOINT import')
+        except psycopg2.InternalError:
+            pass
+
+        # Insert/Update mapping columns when import complete successfully
+        if import_result['ids'] and options.get('headers'):
+            BaseImportMapping = self.env['base_import.mapping']
+            for index, column_name in enumerate(columns):
+                if column_name:
+                    # Update to latest selected field
+                    mapping_domain = [('res_model', '=', self.res_model), ('column_name', '=', column_name)]
+                    column_mapping = BaseImportMapping.search(mapping_domain, limit=1)
+                    if column_mapping:
+                        if column_mapping.field_name != fields[index]:
+                            column_mapping.field_name = fields[index]
+                    else:
+                        BaseImportMapping.create({
+                            'res_model': self.res_model,
+                            'column_name': column_name,
+                            'field_name': fields[index]
+                        })
+        if 'name' in import_fields:
+            index_of_name = import_fields.index('name')
+            skipped = options.get('skip', 0)
+            # pad front as data doesn't contain anythig for skipped lines
+            r = import_result['name'] = [''] * skipped
+            # only add names for the window being imported
+            r.extend(x[index_of_name] for x in data[:import_limit])
+            # pad back (though that's probably not useful)
+            r.extend([''] * (len(data) - (import_limit or 0)))
+        else:
+            import_result['name'] = []
+
+        skip = options.get('skip', 0)
+        # convert load's internal nextrow to the imported file's
+        if import_result['nextrow']: # don't update if nextrow = 0 (= no nextrow)
+            import_result['nextrow'] += skip
+
+        return import_result
+
+_SEPARATORS = [' ', '/', '-', '']
+_PATTERN_BASELINE = [
+    ('%m', '%d', '%Y'),
+    ('%d', '%m', '%Y'),
+    ('%Y', '%m', '%d'),
+    ('%Y', '%d', '%m'),
+]
+DATE_FORMATS = []
+# take the baseline format and duplicate performing the following
+# substitution: long year -> short year, numerical month -> short
+# month, numerical month -> long month. Each substitution builds on
+# the previous two
+for ps in _PATTERN_BASELINE:
+    patterns = {ps}
+    for s, t in [('%Y', '%y')]:
+        patterns.update([ # need listcomp: with genexpr "set changed size during iteration"
+            tuple(t if it == s else it for it in f)
+            for f in patterns
+        ])
+    DATE_FORMATS.extend(patterns)
+DATE_PATTERNS = [
+    sep.join(fmt)
+    for sep in _SEPARATORS
+    for fmt in DATE_FORMATS
+]
+TIME_PATTERNS = [
+    '%H:%M:%S', '%H:%M', '%H', # 24h
+    '%I:%M:%S %p', '%I:%M %p', '%I %p', # 12h
+]
+
+def check_patterns(patterns, values):
+    for pattern in patterns:
+        p = to_re(pattern)
+        for val in values:
+            if val and not p.match(val):
+                break
+
+        else:  # no break, all match
+            return pattern
+
+    return None
+
+def to_re(pattern):
+    """ cut down version of TimeRE converting strptime patterns to regex
+    """
+    pattern = re.sub(r'\s+', r'\\s+', pattern)
+    pattern = re.sub('%([a-z])', _replacer, pattern, flags=re.IGNORECASE)
+    pattern = '^' + pattern + '$'
+    return re.compile(pattern, re.IGNORECASE)
+def _replacer(m):
+    return _P_TO_RE[m.group(1)]
+
+_P_TO_RE = {
+    'd': r"(3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
+    'H': r"(2[0-3]|[0-1]\d|\d)",
+    'I': r"(1[0-2]|0[1-9]|[1-9])",
+    'm': r"(1[0-2]|0[1-9]|[1-9])",
+    'M': r"([0-5]\d|\d)",
+    'S': r"(6[0-1]|[0-5]\d|\d)",
+    'y': r"(\d\d)",
+    'Y': r"(\d\d\d\d)",
+
+    'p': r"(am|pm)",
+
+    '%': '%',
+}
diff --git a/addons/base_import/models/odf_ods_reader.py b/addons/base_import/models/odf_ods_reader.py
new file mode 100644
index 00000000..daa7d2b1
--- /dev/null
+++ b/addons/base_import/models/odf_ods_reader.py
@@ -0,0 +1,98 @@
+# Copyright 2011 Marco Conti
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# sourced from https://github.com/marcoconti83/read-ods-with-odfpy
+# further altered locally
+
+from odf import opendocument
+from odf.table import Table, TableRow, TableCell
+from odf.text import P
+
+
+class ODSReader(object):
+
+    # loads the file
+    def __init__(self, file=None, content=None, clonespannedcolumns=None):
+        if not content:
+            self.clonespannedcolumns = clonespannedcolumns
+            self.doc = opendocument.load(file)
+        else:
+            self.clonespannedcolumns = clonespannedcolumns
+            self.doc = content
+        self.SHEETS = {}
+        for sheet in self.doc.spreadsheet.getElementsByType(Table):
+            self.readSheet(sheet)
+
+    # reads a sheet in the sheet dictionary, storing each sheet as an
+    # array (rows) of arrays (columns)
+    def readSheet(self, sheet):
+        name = sheet.getAttribute("name")
+        rows = sheet.getElementsByType(TableRow)
+        arrRows = []
+
+        # for each row
+        for row in rows:
+            arrCells = []
+            cells = row.getElementsByType(TableCell)
+
+            # for each cell
+            for count, cell in enumerate(cells, start=1):
+                # repeated value?
+                repeat = 0
+                if count != len(cells):
+                    repeat = cell.getAttribute("numbercolumnsrepeated")
+                if not repeat:
+                    repeat = 1
+                    spanned = int(cell.getAttribute('numbercolumnsspanned') or 0)
+                    # clone spanned cells
+                    if self.clonespannedcolumns is not None and spanned > 1:
+                        repeat = spanned
+
+                ps = cell.getElementsByType(P)
+                textContent = u""
+
+                # for each text/text:span node
+                for p in ps:
+                    for n in p.childNodes:
+                        if n.nodeType == 1 and n.tagName == "text:span":
+                            for c in n.childNodes:
+                                if c.nodeType == 3:
+                                    textContent = u'{}{}'.format(textContent, n.data)
+
+                        if n.nodeType == 3:
+                            textContent = u'{}{}'.format(textContent, n.data)
+
+                if textContent:
+                    if not textContent.startswith("#"):  # ignore comments cells
+                        for rr in range(int(repeat)):  # repeated?
+                            arrCells.append(textContent)
+                else:
+                    for rr in range(int(repeat)):
+                        arrCells.append("")
+
+            # if row contained something
+            if arrCells:
+                arrRows.append(arrCells)
+
+            #else:
+            #    print ("Empty or commented row (", row_comment, ")")
+
+        self.SHEETS[name] = arrRows
+
+    # returns a sheet as an array (rows) of arrays (columns)
+    def getSheet(self, name):
+        return self.SHEETS[name]
+
+    def getFirstSheet(self):
+        return next(iter(self.SHEETS.values()))
diff --git a/addons/base_import/models/test_models.py b/addons/base_import/models/test_models.py
new file mode 100644
index 00000000..af025573
--- /dev/null
+++ b/addons/base_import/models/test_models.py
@@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+from odoo import fields, models
+
+
+def model(suffix_name):
+    return 'base_import.tests.models.%s' % suffix_name
+
+
+class Char(models.Model):
+    _name = model('char')
+    _description = 'Tests : Base Import Model, Character'
+
+    value = fields.Char()
+class CharRequired(models.Model):
+    _name = model('char.required')
+    _description = 'Tests : Base Import Model, Character required'
+
+    value = fields.Char(required=True)
+
+class CharReadonly(models.Model):
+    _name = model('char.readonly')
+    _description = 'Tests : Base Import Model, Character readonly'
+
+    value = fields.Char(readonly=True)
+
+class CharStates(models.Model):
+    _name = model('char.states')
+    _description = 'Tests : Base Import Model, Character states'
+
+    value = fields.Char(readonly=True, states={'draft': [('readonly', False)]})
+
+class CharNoreadonly(models.Model):
+    _name = model('char.noreadonly')
+    _description = 'Tests : Base Import Model, Character No readonly'
+
+    value = fields.Char(readonly=True, states={'draft': [('invisible', True)]})
+
+class CharStillreadonly(models.Model):
+    _name = model('char.stillreadonly')
+    _description = 'Tests : Base Import Model, Character still readonly'
+
+    value = fields.Char(readonly=True, states={'draft': [('readonly', True)]})
+
+# TODO: complex field (m2m, o2m, m2o)
+class M2o(models.Model):
+    _name = model('m2o')
+    _description = 'Tests : Base Import Model, Many to One'
+
+    value = fields.Many2one(model('m2o.related'))
+
+class M2oRelated(models.Model):
+    _name = model('m2o.related')
+    _description = 'Tests : Base Import Model, Many to One related'
+
+    value = fields.Integer(default=42)
+
+class M2oRequired(models.Model):
+    _name = model('m2o.required')
+    _description = 'Tests : Base Import Model, Many to One required'
+
+    value = fields.Many2one(model('m2o.required.related'), required=True)
+
+class M2oRequiredRelated(models.Model):
+    _name = model('m2o.required.related')
+    _description = 'Tests : Base Import Model, Many to One required related'
+
+    value = fields.Integer(default=42)
+
+class O2m(models.Model):
+    _name = model('o2m')
+    _description = 'Tests : Base Import Model, One to Many'
+
+    name = fields.Char()
+    value = fields.One2many(model('o2m.child'), 'parent_id')
+
+class O2mChild(models.Model):
+    _name = model('o2m.child')
+    _description = 'Tests : Base Import Model, One to Many child'
+
+    parent_id = fields.Many2one(model('o2m'))
+    value = fields.Integer()
+
+class PreviewModel(models.Model):
+    _name = model('preview')
+    _description = 'Tests : Base Import Model Preview'
+
+    name = fields.Char('Name')
+    somevalue = fields.Integer(string='Some Value', required=True)
+    othervalue = fields.Integer(string='Other Variable')
+
+class FloatModel(models.Model):
+    _name = model('float')
+    _description = 'Tests: Base Import Model Float'
+
+    value = fields.Float()
+    value2 = fields.Monetary()
+    currency_id = fields.Many2one('res.currency')
+
+class ComplexModel(models.Model):
+    _name = model('complex')
+    _description = 'Tests: Base Import Model Complex'
+
+    f = fields.Float()
+    m = fields.Monetary()
+    c = fields.Char()
+    currency_id = fields.Many2one('res.currency')
+    d = fields.Date()
+    dt = fields.Datetime()
author	stephanchrst <stephanchrst@gmail.com>	2022-05-10 21:51:50 +0700
committer	stephanchrst <stephanchrst@gmail.com>	2022-05-10 21:51:50 +0700
commit	3751379f1e9a4c215fb6eb898b4ccc67659b9ace (patch)
tree	a44932296ef4a9b71d5f010906253d8c53727726 /addons/base_import/models
parent	0a15094050bfde69a06d6eff798e9a8ddf2b8c21 (diff)