diff options
| author | stephanchrst <stephanchrst@gmail.com> | 2022-05-10 21:51:50 +0700 |
|---|---|---|
| committer | stephanchrst <stephanchrst@gmail.com> | 2022-05-10 21:51:50 +0700 |
| commit | 3751379f1e9a4c215fb6eb898b4ccc67659b9ace (patch) | |
| tree | a44932296ef4a9b71d5f010906253d8c53727726 /addons/base_import/models/odf_ods_reader.py | |
| parent | 0a15094050bfde69a06d6eff798e9a8ddf2b8c21 (diff) | |
initial commit 2
Diffstat (limited to 'addons/base_import/models/odf_ods_reader.py')
| -rw-r--r-- | addons/base_import/models/odf_ods_reader.py | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/addons/base_import/models/odf_ods_reader.py b/addons/base_import/models/odf_ods_reader.py new file mode 100644 index 00000000..daa7d2b1 --- /dev/null +++ b/addons/base_import/models/odf_ods_reader.py @@ -0,0 +1,98 @@ +# Copyright 2011 Marco Conti + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# sourced from https://github.com/marcoconti83/read-ods-with-odfpy +# further altered locally + +from odf import opendocument +from odf.table import Table, TableRow, TableCell +from odf.text import P + + +class ODSReader(object): + + # loads the file + def __init__(self, file=None, content=None, clonespannedcolumns=None): + if not content: + self.clonespannedcolumns = clonespannedcolumns + self.doc = opendocument.load(file) + else: + self.clonespannedcolumns = clonespannedcolumns + self.doc = content + self.SHEETS = {} + for sheet in self.doc.spreadsheet.getElementsByType(Table): + self.readSheet(sheet) + + # reads a sheet in the sheet dictionary, storing each sheet as an + # array (rows) of arrays (columns) + def readSheet(self, sheet): + name = sheet.getAttribute("name") + rows = sheet.getElementsByType(TableRow) + arrRows = [] + + # for each row + for row in rows: + arrCells = [] + cells = row.getElementsByType(TableCell) + + # for each cell + for count, cell in enumerate(cells, start=1): + # repeated value? + repeat = 0 + if count != len(cells): + repeat = cell.getAttribute("numbercolumnsrepeated") + if not repeat: + repeat = 1 + spanned = int(cell.getAttribute('numbercolumnsspanned') or 0) + # clone spanned cells + if self.clonespannedcolumns is not None and spanned > 1: + repeat = spanned + + ps = cell.getElementsByType(P) + textContent = u"" + + # for each text/text:span node + for p in ps: + for n in p.childNodes: + if n.nodeType == 1 and n.tagName == "text:span": + for c in n.childNodes: + if c.nodeType == 3: + textContent = u'{}{}'.format(textContent, n.data) + + if n.nodeType == 3: + textContent = u'{}{}'.format(textContent, n.data) + + if textContent: + if not textContent.startswith("#"): # ignore comments cells + for rr in range(int(repeat)): # repeated? + arrCells.append(textContent) + else: + for rr in range(int(repeat)): + arrCells.append("") + + # if row contained something + if arrCells: + arrRows.append(arrCells) + + #else: + # print ("Empty or commented row (", row_comment, ")") + + self.SHEETS[name] = arrRows + + # returns a sheet as an array (rows) of arrays (columns) + def getSheet(self, name): + return self.SHEETS[name] + + def getFirstSheet(self): + return next(iter(self.SHEETS.values())) |
