diff options
| author | stephanchrst <stephanchrst@gmail.com> | 2022-05-10 21:51:50 +0700 |
|---|---|---|
| committer | stephanchrst <stephanchrst@gmail.com> | 2022-05-10 21:51:50 +0700 |
| commit | 3751379f1e9a4c215fb6eb898b4ccc67659b9ace (patch) | |
| tree | a44932296ef4a9b71d5f010906253d8c53727726 /addons/base_import/models | |
| parent | 0a15094050bfde69a06d6eff798e9a8ddf2b8c21 (diff) | |
initial commit 2
Diffstat (limited to 'addons/base_import/models')
| -rw-r--r-- | addons/base_import/models/__init__.py | 5 | ||||
| -rw-r--r-- | addons/base_import/models/base_import.py | 1050 | ||||
| -rw-r--r-- | addons/base_import/models/odf_ods_reader.py | 98 | ||||
| -rw-r--r-- | addons/base_import/models/test_models.py | 108 |
4 files changed, 1261 insertions, 0 deletions
diff --git a/addons/base_import/models/__init__.py b/addons/base_import/models/__init__.py new file mode 100644 index 00000000..f349a3ca --- /dev/null +++ b/addons/base_import/models/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- +# Part of Odoo. See LICENSE file for full copyright and licensing details. + +from . import base_import +from . import test_models diff --git a/addons/base_import/models/base_import.py b/addons/base_import/models/base_import.py new file mode 100644 index 00000000..733c3060 --- /dev/null +++ b/addons/base_import/models/base_import.py @@ -0,0 +1,1050 @@ +# -*- coding: utf-8 -*- +# Part of Odoo. See LICENSE file for full copyright and licensing details. + +import base64 +import binascii +import codecs +import collections +import unicodedata + +import chardet +import datetime +import io +import itertools +import logging +import psycopg2 +import operator +import os +import re +import requests + +from PIL import Image + +from odoo import api, fields, models +from odoo.exceptions import AccessError +from odoo.tools.translate import _ +from odoo.tools.mimetypes import guess_mimetype +from odoo.tools import config, DEFAULT_SERVER_DATE_FORMAT, DEFAULT_SERVER_DATETIME_FORMAT, pycompat + +FIELDS_RECURSION_LIMIT = 3 +ERROR_PREVIEW_BYTES = 200 +DEFAULT_IMAGE_TIMEOUT = 3 +DEFAULT_IMAGE_MAXBYTES = 10 * 1024 * 1024 +DEFAULT_IMAGE_REGEX = r"^(?:http|https)://" +DEFAULT_IMAGE_CHUNK_SIZE = 32768 +IMAGE_FIELDS = ["icon", "image", "logo", "picture"] +_logger = logging.getLogger(__name__) +BOM_MAP = { + 'utf-16le': codecs.BOM_UTF16_LE, + 'utf-16be': codecs.BOM_UTF16_BE, + 'utf-32le': codecs.BOM_UTF32_LE, + 'utf-32be': codecs.BOM_UTF32_BE, +} + +try: + import xlrd + try: + from xlrd import xlsx + except ImportError: + xlsx = None +except ImportError: + xlrd = xlsx = None + +try: + from . import odf_ods_reader +except ImportError: + odf_ods_reader = None + +FILE_TYPE_DICT = { + 'text/csv': ('csv', True, None), + 'application/vnd.ms-excel': ('xls', xlrd, 'xlrd'), + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ('xlsx', xlsx, 'xlrd >= 1.0.0'), + 'application/vnd.oasis.opendocument.spreadsheet': ('ods', odf_ods_reader, 'odfpy') +} +EXTENSIONS = { + '.' + ext: handler + for mime, (ext, handler, req) in FILE_TYPE_DICT.items() +} + +class Base(models.AbstractModel): + _inherit = 'base' + + @api.model + def get_import_templates(self): + """ + Get the import templates label and path. + + :return: a list(dict) containing label and template path + like ``[{'label': 'foo', 'template': 'path'}]`` + """ + return [] + +class ImportMapping(models.Model): + """ mapping of previous column:field selections + + This is useful when repeatedly importing from a third-party + system: column names generated by the external system may + not match Odoo's field names or labels. This model is used + to save the mapping between column names and fields so that + next time a user imports from the same third-party systems + we can automatically match the columns to the correct field + without them having to re-enter the mapping every single + time. + """ + _name = 'base_import.mapping' + _description = 'Base Import Mapping' + + res_model = fields.Char(index=True) + column_name = fields.Char() + field_name = fields.Char() + + +class ResUsers(models.Model): + _inherit = 'res.users' + + def _can_import_remote_urls(self): + """ Hook to decide whether the current user is allowed to import + images via URL (as such an import can DOS a worker). By default, + allows the administrator group. + + :rtype: bool + """ + self.ensure_one() + return self._is_admin() + +class Import(models.TransientModel): + + _name = 'base_import.import' + _description = 'Base Import' + + # allow imports to survive for 12h in case user is slow + _transient_max_hours = 12.0 + + res_model = fields.Char('Model') + file = fields.Binary('File', help="File to check and/or import, raw binary (not base64)", attachment=False) + file_name = fields.Char('File Name') + file_type = fields.Char('File Type') + + @api.model + def get_fields(self, model, depth=FIELDS_RECURSION_LIMIT): + """ Recursively get fields for the provided model (through + fields_get) and filter them according to importability + + The output format is a list of ``Field``, with ``Field`` + defined as: + + .. class:: Field + + .. attribute:: id (str) + + A non-unique identifier for the field, used to compute + the span of the ``required`` attribute: if multiple + ``required`` fields have the same id, only one of them + is necessary. + + .. attribute:: name (str) + + The field's logical (Odoo) name within the scope of + its parent. + + .. attribute:: string (str) + + The field's human-readable name (``@string``) + + .. attribute:: required (bool) + + Whether the field is marked as required in the + model. Clients must provide non-empty import values + for all required fields or the import will error out. + + .. attribute:: fields (list(Field)) + + The current field's subfields. The database and + external identifiers for m2o and m2m fields; a + filtered and transformed fields_get for o2m fields (to + a variable depth defined by ``depth``). + + Fields with no sub-fields will have an empty list of + sub-fields. + + :param str model: name of the model to get fields form + :param int depth: depth of recursion into o2m fields + """ + Model = self.env[model] + importable_fields = [{ + 'id': 'id', + 'name': 'id', + 'string': _("External ID"), + 'required': False, + 'fields': [], + 'type': 'id', + }] + if not depth: + return importable_fields + + model_fields = Model.fields_get() + blacklist = models.MAGIC_COLUMNS + [Model.CONCURRENCY_CHECK_FIELD] + for name, field in model_fields.items(): + if name in blacklist: + continue + # an empty string means the field is deprecated, @deprecated must + # be absent or False to mean not-deprecated + if field.get('deprecated', False) is not False: + continue + if field.get('readonly'): + states = field.get('states') + if not states: + continue + # states = {state: [(attr, value), (attr2, value2)], state2:...} + if not any(attr == 'readonly' and value is False + for attr, value in itertools.chain.from_iterable(states.values())): + continue + field_value = { + 'id': name, + 'name': name, + 'string': field['string'], + # Y U NO ALWAYS HAS REQUIRED + 'required': bool(field.get('required')), + 'fields': [], + 'type': field['type'], + } + + if field['type'] in ('many2many', 'many2one'): + field_value['fields'] = [ + dict(field_value, name='id', string=_("External ID"), type='id'), + dict(field_value, name='.id', string=_("Database ID"), type='id'), + ] + elif field['type'] == 'one2many': + field_value['fields'] = self.get_fields(field['relation'], depth=depth-1) + if self.user_has_groups('base.group_no_one'): + field_value['fields'].append({'id': '.id', 'name': '.id', 'string': _("Database ID"), 'required': False, 'fields': [], 'type': 'id'}) + + importable_fields.append(field_value) + + # TODO: cache on model? + return importable_fields + + def _read_file(self, options): + """ Dispatch to specific method to read file content, according to its mimetype or file type + :param options : dict of reading options (quoting, separator, ...) + """ + self.ensure_one() + # guess mimetype from file content + mimetype = guess_mimetype(self.file or b'') + (file_extension, handler, req) = FILE_TYPE_DICT.get(mimetype, (None, None, None)) + if handler: + try: + return getattr(self, '_read_' + file_extension)(options) + except Exception: + _logger.warning("Failed to read file '%s' (transient id %d) using guessed mimetype %s", self.file_name or '<unknown>', self.id, mimetype) + + # try reading with user-provided mimetype + (file_extension, handler, req) = FILE_TYPE_DICT.get(self.file_type, (None, None, None)) + if handler: + try: + return getattr(self, '_read_' + file_extension)(options) + except Exception: + _logger.warning("Failed to read file '%s' (transient id %d) using user-provided mimetype %s", self.file_name or '<unknown>', self.id, self.file_type) + + # fallback on file extensions as mime types can be unreliable (e.g. + # software setting incorrect mime types, or non-installed software + # leading to browser not sending mime types) + if self.file_name: + p, ext = os.path.splitext(self.file_name) + if ext in EXTENSIONS: + try: + return getattr(self, '_read_' + ext[1:])(options) + except Exception: + _logger.warning("Failed to read file '%s' (transient id %s) using file extension", self.file_name, self.id) + + if req: + raise ImportError(_("Unable to load \"{extension}\" file: requires Python module \"{modname}\"").format(extension=file_extension, modname=req)) + raise ValueError(_("Unsupported file format \"{}\", import only supports CSV, ODS, XLS and XLSX").format(self.file_type)) + + def _read_xls(self, options): + """ Read file content, using xlrd lib """ + book = xlrd.open_workbook(file_contents=self.file or b'') + sheets = options['sheets'] = book.sheet_names() + sheet = options['sheet'] = options.get('sheet') or sheets[0] + return self._read_xls_book(book, sheet) + + def _read_xls_book(self, book, sheet_name): + sheet = book.sheet_by_name(sheet_name) + # emulate Sheet.get_rows for pre-0.9.4 + for rowx, row in enumerate(map(sheet.row, range(sheet.nrows)), 1): + values = [] + for colx, cell in enumerate(row, 1): + if cell.ctype is xlrd.XL_CELL_NUMBER: + is_float = cell.value % 1 != 0.0 + values.append( + str(cell.value) + if is_float + else str(int(cell.value)) + ) + elif cell.ctype is xlrd.XL_CELL_DATE: + is_datetime = cell.value % 1 != 0.0 + # emulate xldate_as_datetime for pre-0.9.3 + dt = datetime.datetime(*xlrd.xldate.xldate_as_tuple(cell.value, book.datemode)) + values.append( + dt.strftime(DEFAULT_SERVER_DATETIME_FORMAT) + if is_datetime + else dt.strftime(DEFAULT_SERVER_DATE_FORMAT) + ) + elif cell.ctype is xlrd.XL_CELL_BOOLEAN: + values.append(u'True' if cell.value else u'False') + elif cell.ctype is xlrd.XL_CELL_ERROR: + raise ValueError( + _("Invalid cell value at row %(row)s, column %(col)s: %(cell_value)s") % { + 'row': rowx, + 'col': colx, + 'cell_value': xlrd.error_text_from_code.get(cell.value, _("unknown error code %s", cell.value)) + } + ) + else: + values.append(cell.value) + if any(x for x in values if x.strip()): + yield values + + # use the same method for xlsx and xls files + _read_xlsx = _read_xls + + def _read_ods(self, options): + """ Read file content using ODSReader custom lib """ + doc = odf_ods_reader.ODSReader(file=io.BytesIO(self.file or b'')) + sheets = options['sheets'] = list(doc.SHEETS.keys()) + sheet = options['sheet'] = options.get('sheet') or sheets[0] + + return ( + row + for row in doc.getSheet(sheet) + if any(x for x in row if x.strip()) + ) + + def _read_csv(self, options): + """ Returns a CSV-parsed iterator of all non-empty lines in the file + :throws csv.Error: if an error is detected during CSV parsing + """ + csv_data = self.file or b'' + if not csv_data: + return iter([]) + + encoding = options.get('encoding') + if not encoding: + encoding = options['encoding'] = chardet.detect(csv_data)['encoding'].lower() + # some versions of chardet (e.g. 2.3.0 but not 3.x) will return + # utf-(16|32)(le|be), which for python means "ignore / don't strip + # BOM". We don't want that, so rectify the encoding to non-marked + # IFF the guessed encoding is LE/BE and csv_data starts with a BOM + bom = BOM_MAP.get(encoding) + if bom and csv_data.startswith(bom): + encoding = options['encoding'] = encoding[:-2] + + if encoding != 'utf-8': + csv_data = csv_data.decode(encoding).encode('utf-8') + + separator = options.get('separator') + if not separator: + # default for unspecified separator so user gets a message about + # having to specify it + separator = ',' + for candidate in (',', ';', '\t', ' ', '|', unicodedata.lookup('unit separator')): + # pass through the CSV and check if all rows are the same + # length & at least 2-wide assume it's the correct one + it = pycompat.csv_reader(io.BytesIO(csv_data), quotechar=options['quoting'], delimiter=candidate) + w = None + for row in it: + width = len(row) + if w is None: + w = width + if width == 1 or width != w: + break # next candidate + else: # nobreak + separator = options['separator'] = candidate + break + + csv_iterator = pycompat.csv_reader( + io.BytesIO(csv_data), + quotechar=options['quoting'], + delimiter=separator) + + return ( + row for row in csv_iterator + if any(x for x in row if x.strip()) + ) + + @api.model + def _try_match_column(self, preview_values, options): + """ Returns the potential field types, based on the preview values, using heuristics + :param preview_values : list of value for the column to determine + :param options : parsing options + """ + values = set(preview_values) + # If all values are empty in preview than can be any field + if values == {''}: + return ['all'] + + # If all values starts with __export__ this is probably an id + if all(v.startswith('__export__') for v in values): + return ['id', 'many2many', 'many2one', 'one2many'] + + # If all values can be cast to int type is either id, float or monetary + # Exception: if we only have 1 and 0, it can also be a boolean + if all(v.isdigit() for v in values if v): + field_type = ['id', 'integer', 'char', 'float', 'monetary', 'many2one', 'many2many', 'one2many'] + if {'0', '1', ''}.issuperset(values): + field_type.append('boolean') + return field_type + + # If all values are either True or False, type is boolean + if all(val.lower() in ('true', 'false', 't', 'f', '') for val in preview_values): + return ['boolean'] + + # If all values can be cast to float, type is either float or monetary + results = [] + try: + thousand_separator = decimal_separator = False + for val in preview_values: + val = val.strip() + if not val: + continue + # value might have the currency symbol left or right from the value + val = self._remove_currency_symbol(val) + if val: + if options.get('float_thousand_separator') and options.get('float_decimal_separator'): + val = val.replace(options['float_thousand_separator'], '').replace(options['float_decimal_separator'], '.') + # We are now sure that this is a float, but we still need to find the + # thousand and decimal separator + else: + if val.count('.') > 1: + options['float_thousand_separator'] = '.' + options['float_decimal_separator'] = ',' + elif val.count(',') > 1: + options['float_thousand_separator'] = ',' + options['float_decimal_separator'] = '.' + elif val.find('.') > val.find(','): + thousand_separator = ',' + decimal_separator = '.' + elif val.find(',') > val.find('.'): + thousand_separator = '.' + decimal_separator = ',' + else: + # This is not a float so exit this try + float('a') + if thousand_separator and not options.get('float_decimal_separator'): + options['float_thousand_separator'] = thousand_separator + options['float_decimal_separator'] = decimal_separator + results = ['float', 'monetary'] + except ValueError: + pass + + results += self._try_match_date_time(preview_values, options) + if results: + return results + + return ['id', 'text', 'boolean', 'char', 'datetime', 'selection', 'many2one', 'one2many', 'many2many', 'html'] + + + def _try_match_date_time(self, preview_values, options): + # Or a date/datetime if it matches the pattern + date_patterns = [options['date_format']] if options.get( + 'date_format') else [] + user_date_format = self.env['res.lang']._lang_get(self.env.user.lang).date_format + if user_date_format: + try: + to_re(user_date_format) + date_patterns.append(user_date_format) + except KeyError: + pass + date_patterns.extend(DATE_PATTERNS) + match = check_patterns(date_patterns, preview_values) + if match: + options['date_format'] = match + return ['date', 'datetime'] + + datetime_patterns = [options['datetime_format']] if options.get( + 'datetime_format') else [] + datetime_patterns.extend( + "%s %s" % (d, t) + for d in date_patterns + for t in TIME_PATTERNS + ) + match = check_patterns(datetime_patterns, preview_values) + if match: + options['datetime_format'] = match + return ['datetime'] + + return [] + + @api.model + def _find_type_from_preview(self, options, preview): + type_fields = [] + if preview: + for column in range(0, len(preview[0])): + preview_values = [value[column].strip() for value in preview] + type_field = self._try_match_column(preview_values, options) + type_fields.append(type_field) + return type_fields + + def _match_header(self, header, fields, options): + """ Attempts to match a given header to a field of the + imported model. + + :param str header: header name from the CSV file + :param fields: + :param dict options: + :returns: an empty list if the header couldn't be matched, or + all the fields to traverse + :rtype: list(Field) + """ + string_match = None + IrTranslation = self.env['ir.translation'] + for field in fields: + # FIXME: should match all translations & original + # TODO: use string distance (levenshtein? hamming?) + if header.lower() == field['name'].lower(): + return [field] + if header.lower() == field['string'].lower(): + # matching string are not reliable way because + # strings have no unique constraint + string_match = field + translated_header = IrTranslation._get_source('ir.model.fields,field_description', 'model', self.env.lang, header).lower() + if translated_header == field['string'].lower(): + string_match = field + if string_match: + # this behavior is only applied if there is no matching field['name'] + return [string_match] + + if '/' not in header: + return [] + + # relational field path + traversal = [] + subfields = fields + # Iteratively dive into fields tree + for section in header.split('/'): + # Strip section in case spaces are added around '/' for + # readability of paths + match = self._match_header(section.strip(), subfields, options) + # Any match failure, exit + if not match: + return [] + # prep subfields for next iteration within match[0] + field = match[0] + subfields = field['fields'] + traversal.append(field) + return traversal + + def _match_headers(self, rows, fields, options): + """ Attempts to match the imported model's fields to the + titles of the parsed CSV file, if the file is supposed to have + headers. + + Will consume the first line of the ``rows`` iterator. + + Returns the list of headers and a dict mapping cell indices + to key paths in the ``fields`` tree. If headers were not + requested, both collections are empty. + + :param Iterator rows: + :param dict fields: + :param dict options: + :rtype: (list(str), dict(int: list(str))) + """ + if not options.get('headers'): + return [], {} + + headers = next(rows, None) + if not headers: + return [], {} + + matches = {} + mapping_records = self.env['base_import.mapping'].search_read([('res_model', '=', self.res_model)], ['column_name', 'field_name']) + mapping_fields = {rec['column_name']: rec['field_name'] for rec in mapping_records} + for index, header in enumerate(headers): + match_field = [] + mapping_field_name = mapping_fields.get(header.lower()) + if mapping_field_name: + match_field = mapping_field_name.split('/') + if not match_field: + match_field = [field['name'] for field in self._match_header(header, fields, options)] + matches[index] = match_field or None + return headers, matches + + def parse_preview(self, options, count=10): + """ Generates a preview of the uploaded files, and performs + fields-matching between the import's file data and the model's + columns. + + If the headers are not requested (not options.headers), + ``matches`` and ``headers`` are both ``False``. + + :param int count: number of preview lines to generate + :param options: format-specific options. + CSV: {quoting, separator, headers} + :type options: {str, str, str, bool} + :returns: {fields, matches, headers, preview} | {error, preview} + :rtype: {dict(str: dict(...)), dict(int, list(str)), list(str), list(list(str))} | {str, str} + """ + self.ensure_one() + fields = self.get_fields(self.res_model) + try: + rows = self._read_file(options) + headers, matches = self._match_headers(rows, fields, options) + # Match should have consumed the first row (iif headers), get + # the ``count`` next rows for preview + preview = list(itertools.islice(rows, count)) + assert preview, "file seems to have no content" + header_types = self._find_type_from_preview(options, preview) + if options.get('keep_matches') and len(options.get('fields', [])): + matches = {} + for index, match in enumerate(options.get('fields')): + if match: + matches[index] = match.split('/') + + if options.get('keep_matches'): + advanced_mode = options.get('advanced') + else: + # Check is label contain relational field + has_relational_header = any(len(models.fix_import_export_id_paths(col)) > 1 for col in headers) + # Check is matches fields have relational field + has_relational_match = any(len(match) > 1 for field, match in matches.items() if match) + advanced_mode = has_relational_header or has_relational_match + + batch = False + batch_cutoff = options.get('limit') + if batch_cutoff: + if count > batch_cutoff: + batch = len(preview) > batch_cutoff + else: + batch = bool(next( + itertools.islice(rows, batch_cutoff - count, None), + None + )) + + return { + 'fields': fields, + 'matches': matches or False, + 'headers': headers or False, + 'headers_type': header_types or False, + 'preview': preview, + 'options': options, + 'advanced_mode': advanced_mode, + 'debug': self.user_has_groups('base.group_no_one'), + 'batch': batch, + } + except Exception as error: + # Due to lazy generators, UnicodeDecodeError (for + # instance) may only be raised when serializing the + # preview to a list in the return. + _logger.debug("Error during parsing preview", exc_info=True) + preview = None + if self.file_type == 'text/csv' and self.file: + preview = self.file[:ERROR_PREVIEW_BYTES].decode('iso-8859-1') + return { + 'error': str(error), + # iso-8859-1 ensures decoding will always succeed, + # even if it yields non-printable characters. This is + # in case of UnicodeDecodeError (or csv.Error + # compounded with UnicodeDecodeError) + 'preview': preview, + } + + @api.model + def _convert_import_data(self, fields, options): + """ Extracts the input BaseModel and fields list (with + ``False``-y placeholders for fields to *not* import) into a + format Model.import_data can use: a fields list without holes + and the precisely matching data matrix + + :param list(str|bool): fields + :returns: (data, fields) + :rtype: (list(list(str)), list(str)) + :raises ValueError: in case the import data could not be converted + """ + # Get indices for non-empty fields + indices = [index for index, field in enumerate(fields) if field] + if not indices: + raise ValueError(_("You must configure at least one field to import")) + # If only one index, itemgetter will return an atom rather + # than a 1-tuple + if len(indices) == 1: + mapper = lambda row: [row[indices[0]]] + else: + mapper = operator.itemgetter(*indices) + # Get only list of actually imported fields + import_fields = [f for f in fields if f] + + rows_to_import = self._read_file(options) + if options.get('headers'): + rows_to_import = itertools.islice(rows_to_import, 1, None) + data = [ + list(row) for row in map(mapper, rows_to_import) + # don't try inserting completely empty rows (e.g. from + # filtering out o2m fields) + if any(row) + ] + + # slicing needs to happen after filtering out empty rows as the + # data offsets from load are post-filtering + return data[options.get('skip'):], import_fields + + @api.model + def _remove_currency_symbol(self, value): + value = value.strip() + negative = False + # Careful that some countries use () for negative so replace it by - sign + if value.startswith('(') and value.endswith(')'): + value = value[1:-1] + negative = True + float_regex = re.compile(r'([+-]?[0-9.,]+)') + split_value = [g for g in float_regex.split(value) if g] + if len(split_value) > 2: + # This is probably not a float + return False + if len(split_value) == 1: + if float_regex.search(split_value[0]) is not None: + return split_value[0] if not negative else '-' + split_value[0] + return False + else: + # String has been split in 2, locate which index contains the float and which does not + currency_index = 0 + if float_regex.search(split_value[0]) is not None: + currency_index = 1 + # Check that currency exists + currency = self.env['res.currency'].search([('symbol', '=', split_value[currency_index].strip())]) + if len(currency): + return split_value[(currency_index + 1) % 2] if not negative else '-' + split_value[(currency_index + 1) % 2] + # Otherwise it is not a float with a currency symbol + return False + + @api.model + def _parse_float_from_data(self, data, index, name, options): + for line in data: + line[index] = line[index].strip() + if not line[index]: + continue + thousand_separator, decimal_separator = self._infer_separators(line[index], options) + + if 'E' in line[index] or 'e' in line[index]: + tmp_value = line[index].replace(thousand_separator, '.') + try: + tmp_value = '{:f}'.format(float(tmp_value)) + line[index] = tmp_value + thousand_separator = ' ' + except Exception: + pass + + line[index] = line[index].replace(thousand_separator, '').replace(decimal_separator, '.') + old_value = line[index] + line[index] = self._remove_currency_symbol(line[index]) + if line[index] is False: + raise ValueError(_("Column %s contains incorrect values (value: %s)", name, old_value)) + + def _infer_separators(self, value, options): + """ Try to infer the shape of the separators: if there are two + different "non-numberic" characters in the number, the + former/duplicated one would be grouping ("thousands" separator) and + the latter would be the decimal separator. The decimal separator + should furthermore be unique. + """ + # can't use \p{Sc} using re so handroll it + non_number = [ + # any character + c for c in value + # which is not a numeric decoration (() is used for negative + # by accountants) + if c not in '()-+' + # which is not a digit or a currency symbol + if unicodedata.category(c) not in ('Nd', 'Sc') + ] + + counts = collections.Counter(non_number) + # if we have two non-numbers *and* the last one has a count of 1, + # we probably have grouping & decimal separators + if len(counts) == 2 and counts[non_number[-1]] == 1: + return [character for character, _count in counts.most_common()] + + # otherwise get whatever's in the options, or fallback to a default + thousand_separator = options.get('float_thousand_separator', ' ') + decimal_separator = options.get('float_decimal_separator', '.') + return thousand_separator, decimal_separator + + def _parse_import_data(self, data, import_fields, options): + """ Lauch first call to _parse_import_data_recursive with an + empty prefix. _parse_import_data_recursive will be run + recursively for each relational field. + """ + return self._parse_import_data_recursive(self.res_model, '', data, import_fields, options) + + def _parse_import_data_recursive(self, model, prefix, data, import_fields, options): + # Get fields of type date/datetime + all_fields = self.env[model].fields_get() + for name, field in all_fields.items(): + name = prefix + name + if field['type'] in ('date', 'datetime') and name in import_fields: + index = import_fields.index(name) + self._parse_date_from_data(data, index, name, field['type'], options) + # Check if the field is in import_field and is a relational (followed by /) + # Also verify that the field name exactly match the import_field at the correct level. + elif any(name + '/' in import_field and name == import_field.split('/')[prefix.count('/')] for import_field in import_fields): + # Recursive call with the relational as new model and add the field name to the prefix + self._parse_import_data_recursive(field['relation'], name + '/', data, import_fields, options) + elif field['type'] in ('float', 'monetary') and name in import_fields: + # Parse float, sometimes float values from file have currency symbol or () to denote a negative value + # We should be able to manage both case + index = import_fields.index(name) + self._parse_float_from_data(data, index, name, options) + elif field['type'] == 'binary' and field.get('attachment') and any(f in name for f in IMAGE_FIELDS) and name in import_fields: + index = import_fields.index(name) + + with requests.Session() as session: + session.stream = True + + for num, line in enumerate(data): + if re.match(config.get("import_image_regex", DEFAULT_IMAGE_REGEX), line[index]): + if not self.env.user._can_import_remote_urls(): + raise AccessError(_("You can not import images via URL, check with your administrator or support for the reason.")) + + line[index] = self._import_image_by_url(line[index], session, name, num) + else: + try: + base64.b64decode(line[index], validate=True) + except binascii.Error: + raise ValueError(_("Found invalid image data, images should be imported as either URLs or base64-encoded data.")) + + return data + + def _parse_date_from_data(self, data, index, name, field_type, options): + dt = datetime.datetime + fmt = fields.Date.to_string if field_type == 'date' else fields.Datetime.to_string + d_fmt = options.get('date_format') + dt_fmt = options.get('datetime_format') + for num, line in enumerate(data): + if not line[index]: + continue + + v = line[index].strip() + try: + # first try parsing as a datetime if it's one + if dt_fmt and field_type == 'datetime': + try: + line[index] = fmt(dt.strptime(v, dt_fmt)) + continue + except ValueError: + pass + # otherwise try parsing as a date whether it's a date + # or datetime + line[index] = fmt(dt.strptime(v, d_fmt)) + except ValueError as e: + raise ValueError(_("Column %s contains incorrect values. Error in line %d: %s") % (name, num + 1, e)) + except Exception as e: + raise ValueError(_("Error Parsing Date [%s:L%d]: %s") % (name, num + 1, e)) + + def _import_image_by_url(self, url, session, field, line_number): + """ Imports an image by URL + + :param str url: the original field value + :param requests.Session session: + :param str field: name of the field (for logging/debugging) + :param int line_number: 0-indexed line number within the imported file (for logging/debugging) + :return: the replacement value + :rtype: bytes + """ + maxsize = int(config.get("import_image_maxbytes", DEFAULT_IMAGE_MAXBYTES)) + _logger.debug("Trying to import image from URL: %s into field %s, at line %s" % (url, field, line_number)) + try: + response = session.get(url, timeout=int(config.get("import_image_timeout", DEFAULT_IMAGE_TIMEOUT))) + response.raise_for_status() + + if response.headers.get('Content-Length') and int(response.headers['Content-Length']) > maxsize: + raise ValueError(_("File size exceeds configured maximum (%s bytes)", maxsize)) + + content = bytearray() + for chunk in response.iter_content(DEFAULT_IMAGE_CHUNK_SIZE): + content += chunk + if len(content) > maxsize: + raise ValueError(_("File size exceeds configured maximum (%s bytes)", maxsize)) + + image = Image.open(io.BytesIO(content)) + w, h = image.size + if w * h > 42e6: # Nokia Lumia 1020 photo resolution + raise ValueError( + u"Image size excessive, imported images must be smaller " + u"than 42 million pixel") + + return base64.b64encode(content) + except Exception as e: + _logger.exception(e) + raise ValueError(_("Could not retrieve URL: %(url)s [%(field_name)s: L%(line_number)d]: %(error)s") % { + 'url': url, + 'field_name': field, + 'line_number': line_number + 1, + 'error': e + }) + + def do(self, fields, columns, options, dryrun=False): + """ Actual execution of the import + + :param fields: import mapping: maps each column to a field, + ``False`` for the columns to ignore + :type fields: list(str|bool) + :param columns: columns label + :type columns: list(str|bool) + :param dict options: + :param bool dryrun: performs all import operations (and + validations) but rollbacks writes, allows + getting as much errors as possible without + the risk of clobbering the database. + :returns: A list of errors. If the list is empty the import + executed fully and correctly. If the list is + non-empty it contains dicts with 3 keys ``type`` the + type of error (``error|warning``); ``message`` the + error message associated with the error (a string) + and ``record`` the data which failed to import (or + ``false`` if that data isn't available or provided) + :rtype: dict(ids: list(int), messages: list({type, message, record})) + """ + self.ensure_one() + self._cr.execute('SAVEPOINT import') + + try: + data, import_fields = self._convert_import_data(fields, options) + # Parse date and float field + data = self._parse_import_data(data, import_fields, options) + except ValueError as error: + return { + 'messages': [{ + 'type': 'error', + 'message': str(error), + 'record': False, + }] + } + + _logger.info('importing %d rows...', len(data)) + + name_create_enabled_fields = options.pop('name_create_enabled_fields', {}) + import_limit = options.pop('limit', None) + model = self.env[self.res_model].with_context(import_file=True, name_create_enabled_fields=name_create_enabled_fields, _import_limit=import_limit) + import_result = model.load(import_fields, data) + _logger.info('done') + + # If transaction aborted, RELEASE SAVEPOINT is going to raise + # an InternalError (ROLLBACK should work, maybe). Ignore that. + # TODO: to handle multiple errors, create savepoint around + # write and release it in case of write error (after + # adding error to errors array) => can keep on trying to + # import stuff, and rollback at the end if there is any + # error in the results. + try: + if dryrun: + self._cr.execute('ROLLBACK TO SAVEPOINT import') + # cancel all changes done to the registry/ormcache + self.pool.clear_caches() + self.pool.reset_changes() + else: + self._cr.execute('RELEASE SAVEPOINT import') + except psycopg2.InternalError: + pass + + # Insert/Update mapping columns when import complete successfully + if import_result['ids'] and options.get('headers'): + BaseImportMapping = self.env['base_import.mapping'] + for index, column_name in enumerate(columns): + if column_name: + # Update to latest selected field + mapping_domain = [('res_model', '=', self.res_model), ('column_name', '=', column_name)] + column_mapping = BaseImportMapping.search(mapping_domain, limit=1) + if column_mapping: + if column_mapping.field_name != fields[index]: + column_mapping.field_name = fields[index] + else: + BaseImportMapping.create({ + 'res_model': self.res_model, + 'column_name': column_name, + 'field_name': fields[index] + }) + if 'name' in import_fields: + index_of_name = import_fields.index('name') + skipped = options.get('skip', 0) + # pad front as data doesn't contain anythig for skipped lines + r = import_result['name'] = [''] * skipped + # only add names for the window being imported + r.extend(x[index_of_name] for x in data[:import_limit]) + # pad back (though that's probably not useful) + r.extend([''] * (len(data) - (import_limit or 0))) + else: + import_result['name'] = [] + + skip = options.get('skip', 0) + # convert load's internal nextrow to the imported file's + if import_result['nextrow']: # don't update if nextrow = 0 (= no nextrow) + import_result['nextrow'] += skip + + return import_result + +_SEPARATORS = [' ', '/', '-', ''] +_PATTERN_BASELINE = [ + ('%m', '%d', '%Y'), + ('%d', '%m', '%Y'), + ('%Y', '%m', '%d'), + ('%Y', '%d', '%m'), +] +DATE_FORMATS = [] +# take the baseline format and duplicate performing the following +# substitution: long year -> short year, numerical month -> short +# month, numerical month -> long month. Each substitution builds on +# the previous two +for ps in _PATTERN_BASELINE: + patterns = {ps} + for s, t in [('%Y', '%y')]: + patterns.update([ # need listcomp: with genexpr "set changed size during iteration" + tuple(t if it == s else it for it in f) + for f in patterns + ]) + DATE_FORMATS.extend(patterns) +DATE_PATTERNS = [ + sep.join(fmt) + for sep in _SEPARATORS + for fmt in DATE_FORMATS +] +TIME_PATTERNS = [ + '%H:%M:%S', '%H:%M', '%H', # 24h + '%I:%M:%S %p', '%I:%M %p', '%I %p', # 12h +] + +def check_patterns(patterns, values): + for pattern in patterns: + p = to_re(pattern) + for val in values: + if val and not p.match(val): + break + + else: # no break, all match + return pattern + + return None + +def to_re(pattern): + """ cut down version of TimeRE converting strptime patterns to regex + """ + pattern = re.sub(r'\s+', r'\\s+', pattern) + pattern = re.sub('%([a-z])', _replacer, pattern, flags=re.IGNORECASE) + pattern = '^' + pattern + '$' + return re.compile(pattern, re.IGNORECASE) +def _replacer(m): + return _P_TO_RE[m.group(1)] + +_P_TO_RE = { + 'd': r"(3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", + 'H': r"(2[0-3]|[0-1]\d|\d)", + 'I': r"(1[0-2]|0[1-9]|[1-9])", + 'm': r"(1[0-2]|0[1-9]|[1-9])", + 'M': r"([0-5]\d|\d)", + 'S': r"(6[0-1]|[0-5]\d|\d)", + 'y': r"(\d\d)", + 'Y': r"(\d\d\d\d)", + + 'p': r"(am|pm)", + + '%': '%', +} diff --git a/addons/base_import/models/odf_ods_reader.py b/addons/base_import/models/odf_ods_reader.py new file mode 100644 index 00000000..daa7d2b1 --- /dev/null +++ b/addons/base_import/models/odf_ods_reader.py @@ -0,0 +1,98 @@ +# Copyright 2011 Marco Conti + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# sourced from https://github.com/marcoconti83/read-ods-with-odfpy +# further altered locally + +from odf import opendocument +from odf.table import Table, TableRow, TableCell +from odf.text import P + + +class ODSReader(object): + + # loads the file + def __init__(self, file=None, content=None, clonespannedcolumns=None): + if not content: + self.clonespannedcolumns = clonespannedcolumns + self.doc = opendocument.load(file) + else: + self.clonespannedcolumns = clonespannedcolumns + self.doc = content + self.SHEETS = {} + for sheet in self.doc.spreadsheet.getElementsByType(Table): + self.readSheet(sheet) + + # reads a sheet in the sheet dictionary, storing each sheet as an + # array (rows) of arrays (columns) + def readSheet(self, sheet): + name = sheet.getAttribute("name") + rows = sheet.getElementsByType(TableRow) + arrRows = [] + + # for each row + for row in rows: + arrCells = [] + cells = row.getElementsByType(TableCell) + + # for each cell + for count, cell in enumerate(cells, start=1): + # repeated value? + repeat = 0 + if count != len(cells): + repeat = cell.getAttribute("numbercolumnsrepeated") + if not repeat: + repeat = 1 + spanned = int(cell.getAttribute('numbercolumnsspanned') or 0) + # clone spanned cells + if self.clonespannedcolumns is not None and spanned > 1: + repeat = spanned + + ps = cell.getElementsByType(P) + textContent = u"" + + # for each text/text:span node + for p in ps: + for n in p.childNodes: + if n.nodeType == 1 and n.tagName == "text:span": + for c in n.childNodes: + if c.nodeType == 3: + textContent = u'{}{}'.format(textContent, n.data) + + if n.nodeType == 3: + textContent = u'{}{}'.format(textContent, n.data) + + if textContent: + if not textContent.startswith("#"): # ignore comments cells + for rr in range(int(repeat)): # repeated? + arrCells.append(textContent) + else: + for rr in range(int(repeat)): + arrCells.append("") + + # if row contained something + if arrCells: + arrRows.append(arrCells) + + #else: + # print ("Empty or commented row (", row_comment, ")") + + self.SHEETS[name] = arrRows + + # returns a sheet as an array (rows) of arrays (columns) + def getSheet(self, name): + return self.SHEETS[name] + + def getFirstSheet(self): + return next(iter(self.SHEETS.values())) diff --git a/addons/base_import/models/test_models.py b/addons/base_import/models/test_models.py new file mode 100644 index 00000000..af025573 --- /dev/null +++ b/addons/base_import/models/test_models.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +from odoo import fields, models + + +def model(suffix_name): + return 'base_import.tests.models.%s' % suffix_name + + +class Char(models.Model): + _name = model('char') + _description = 'Tests : Base Import Model, Character' + + value = fields.Char() +class CharRequired(models.Model): + _name = model('char.required') + _description = 'Tests : Base Import Model, Character required' + + value = fields.Char(required=True) + +class CharReadonly(models.Model): + _name = model('char.readonly') + _description = 'Tests : Base Import Model, Character readonly' + + value = fields.Char(readonly=True) + +class CharStates(models.Model): + _name = model('char.states') + _description = 'Tests : Base Import Model, Character states' + + value = fields.Char(readonly=True, states={'draft': [('readonly', False)]}) + +class CharNoreadonly(models.Model): + _name = model('char.noreadonly') + _description = 'Tests : Base Import Model, Character No readonly' + + value = fields.Char(readonly=True, states={'draft': [('invisible', True)]}) + +class CharStillreadonly(models.Model): + _name = model('char.stillreadonly') + _description = 'Tests : Base Import Model, Character still readonly' + + value = fields.Char(readonly=True, states={'draft': [('readonly', True)]}) + +# TODO: complex field (m2m, o2m, m2o) +class M2o(models.Model): + _name = model('m2o') + _description = 'Tests : Base Import Model, Many to One' + + value = fields.Many2one(model('m2o.related')) + +class M2oRelated(models.Model): + _name = model('m2o.related') + _description = 'Tests : Base Import Model, Many to One related' + + value = fields.Integer(default=42) + +class M2oRequired(models.Model): + _name = model('m2o.required') + _description = 'Tests : Base Import Model, Many to One required' + + value = fields.Many2one(model('m2o.required.related'), required=True) + +class M2oRequiredRelated(models.Model): + _name = model('m2o.required.related') + _description = 'Tests : Base Import Model, Many to One required related' + + value = fields.Integer(default=42) + +class O2m(models.Model): + _name = model('o2m') + _description = 'Tests : Base Import Model, One to Many' + + name = fields.Char() + value = fields.One2many(model('o2m.child'), 'parent_id') + +class O2mChild(models.Model): + _name = model('o2m.child') + _description = 'Tests : Base Import Model, One to Many child' + + parent_id = fields.Many2one(model('o2m')) + value = fields.Integer() + +class PreviewModel(models.Model): + _name = model('preview') + _description = 'Tests : Base Import Model Preview' + + name = fields.Char('Name') + somevalue = fields.Integer(string='Some Value', required=True) + othervalue = fields.Integer(string='Other Variable') + +class FloatModel(models.Model): + _name = model('float') + _description = 'Tests: Base Import Model Float' + + value = fields.Float() + value2 = fields.Monetary() + currency_id = fields.Many2one('res.currency') + +class ComplexModel(models.Model): + _name = model('complex') + _description = 'Tests: Base Import Model Complex' + + f = fields.Float() + m = fields.Monetary() + c = fields.Char() + currency_id = fields.Many2one('res.currency') + d = fields.Date() + dt = fields.Datetime() |
