From 04d5a42eeeb7c868b42be8a7a3db3aa5a9f429c0 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 11:22:16 +0300 Subject: [PATCH 01/16] src\rinoh\style.py too-many-nested-blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The method write_log had 6 nesting levels, while the recommendation is at most 5. I extracted two nested methods,  _log_entries and in it _handle_source. The methods extraction also fixed too-many-branches and too-many-statements in that method. --- src/rinoh/style.py | 180 ++++++++++++++++++++++++--------------------- 1 file changed, 95 insertions(+), 85 deletions(-) diff --git a/src/rinoh/style.py b/src/rinoh/style.py index fd2bba5be..1e4cfd61a 100644 --- a/src/rinoh/style.py +++ b/src/rinoh/style.py @@ -1049,90 +1049,100 @@ def write_log(self, document_source_root, filename_root): with log_path.open('w', encoding='utf-8') as log: current_page = None current_container = None - for entry in self.entries: - if entry.page_number != current_page: - current_page = entry.page_number - log.write('{line} page {} {line}\n'.format(current_page, - line='-' * 34)) - container = entry.container - if container.top_level_container is not current_container: - current_container = container.top_level_container - log.write("#### {}('{}')\n" - .format(type(current_container).__name__, - current_container.name)) - styled = entry.styled - level = styled.nesting_level - attrs = OrderedDict() - style = None - indent = ' ' * level - loc = '' - if styled.source: + self._log_entries(current_container, current_page, document_source_root, log) + + def _log_entries(self, current_container, current_page, document_source_root, log): + for entry in self.entries: + if entry.page_number != current_page: + current_page = entry.page_number + log.write('{line} page {} {line}\n'.format(current_page, + line='-' * 34)) + container = entry.container + if container.top_level_container is not current_container: + current_container = container.top_level_container + log.write("#### {}('{}')\n" + .format(type(current_container).__name__, + current_container.name)) + styled = entry.styled + level = styled.nesting_level + attrs = OrderedDict() + style = None + indent = ' ' * level + + loc = self._handle_source(document_source_root, styled) + continued_text = '(continued) ' if entry.continued else '' + log.write(' {}{}{}{}' + .format(indent, continued_text, + styled.short_repr(container), loc)) + if entry.custom_message: + log.write('\n {} ! {}\n'.format(indent, + entry.custom_message)) + continue + first = True + if style is not None: + first = False + style_attrs = ', '.join(key + '=' + value + for key, value in style.items()) + log.write('\n {} > {}({})' + .format(indent, attrs['style'], style_attrs)) + if entry: + for match in entry.matches: + base = '' + stylesheet = match.stylesheet + if stylesheet: + if first: + label = '>' + first = False + else: + label = ' ' + name = match.style_name + style = self.stylesheet.get_configuration(name) + base_name = ("DEFAULT" if style.base is None + else str(style.base)) + base = f' > {base_name}' + stylesheet_path = Path(stylesheet) + if stylesheet_path.is_absolute(): + stylesheet = stylesheet_path.relative_to( + document_source_root) + else: + label = 'x' + specificity = ','.join(str(score) + for score in match.specificity) + + log.write('\n {} {} ({}) {}{}{}' + .format(indent, label, specificity, + match.style_name, + f' [{stylesheet}]' if stylesheet + else '', base)) + log.write('\n') + + def _handle_source(self, document_source_root, styled): + + loc = '' + + if styled.source: + try: + filename, line, tag_name = styled.source.location + except ValueError: + loc = f' {styled.source.location}' + else: + if filename: try: - filename, line, tag_name = styled.source.location + filename, extra = filename.split(':') except ValueError: - loc = f' {styled.source.location}' - else: - if filename: - try: - filename, extra = filename.split(':') - except ValueError: - extra = None - file_path = Path(filename) - if file_path.is_absolute(): - try: - file_path = file_path.relative_to( - document_source_root) - except ValueError: - pass - loc = f' {file_path}' - if line: - loc += f':{line}' - if extra: - loc += f' ({extra})' - if tag_name: - loc += f' <{tag_name}>' - continued_text = '(continued) ' if entry.continued else '' - log.write(' {}{}{}{}' - .format(indent, continued_text, - styled.short_repr(container), loc)) - if entry.custom_message: - log.write('\n {} ! {}\n'.format(indent, - entry.custom_message)) - continue - first = True - if style is not None: - first = False - style_attrs = ', '.join(key + '=' + value - for key, value in style.items()) - log.write('\n {} > {}({})' - .format(indent, attrs['style'], style_attrs)) - if entry: - for match in entry.matches: - base = '' - stylesheet = match.stylesheet - if stylesheet: - if first: - label = '>' - first = False - else: - label = ' ' - name = match.style_name - style = self.stylesheet.get_configuration(name) - base_name = ("DEFAULT" if style.base is None - else str(style.base)) - base = f' > {base_name}' - stylesheet_path = Path(stylesheet) - if stylesheet_path.is_absolute(): - stylesheet = stylesheet_path.relative_to( - document_source_root) - else: - label = 'x' - specificity = ','.join(str(score) - for score in match.specificity) - - log.write('\n {} {} ({}) {}{}{}' - .format(indent, label, specificity, - match.style_name, - f' [{stylesheet}]' if stylesheet - else '', base)) - log.write('\n') + extra = None + file_path = Path(filename) + if file_path.is_absolute(): + try: + file_path = file_path.relative_to( + document_source_root) + except ValueError: + pass + loc = f' {file_path}' + if line: + loc += f':{line}' + if extra: + loc += f' ({extra})' + if tag_name: + loc += f' <{tag_name}>' + return loc From 2708eaced9f3b1850f1f55646cf8c86eb3ae9a4c Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 11:40:33 +0300 Subject: [PATCH 02/16] src\rinoh\font\opentype\required.py too-many-branches The constructors, that handles many format values, had too many branches. I extracting handling format 4 into a dedicated method. --- src/rinoh/font/opentype/required.py | 36 ++++++++++++++++------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/rinoh/font/opentype/required.py b/src/rinoh/font/opentype/required.py index 60ade02d0..8516a3702 100644 --- a/src/rinoh/font/opentype/required.py +++ b/src/rinoh/font/opentype/required.py @@ -385,22 +385,7 @@ def __init__(self, file, file_offset=None, **kwargs): elif self['format'] == 2: raise NotImplementedError elif self['format'] == 4: - seg_count = self['segCountX2'] >> 1 - self['glyphIdArray'] = array(ushort, self['length'])(file) - segments = zip(self['startCount'], self['endCount'], - self['idDelta'], self['idRangeOffset']) - out = {} - for i, (start, end, delta, range_offset) in enumerate(segments): - if i == seg_count - 1: - assert end == 0xFFFF - break - if range_offset > 0: - for j, code in enumerate(range(start, end + 1)): - index = (range_offset >> 1) - seg_count + i + j - out[code] = self['glyphIdArray'][index] - else: - for code in range(start, end + 1): - out[code] = (code + delta) % 2**16 + out = self._handle_format_4(file) elif self['format'] == 6: out = {code: index for code, index in zip(range(self['firstCode'], @@ -423,6 +408,25 @@ def __init__(self, file, file_offset=None, **kwargs): raise NotImplementedError self.mapping = out + def _handle_format_4(self, file): + seg_count = self['segCountX2'] >> 1 + self['glyphIdArray'] = array(ushort, self['length'])(file) + segments = zip(self['startCount'], self['endCount'], + self['idDelta'], self['idRangeOffset']) + out = {} + for i, (start, end, delta, range_offset) in enumerate(segments): + if i == seg_count - 1: + assert end == 0xFFFF + break + if range_offset > 0: + for j, code in enumerate(range(start, end + 1)): + index = (range_offset >> 1) - seg_count + i + j + out[code] = self['glyphIdArray'][index] + else: + for code in range(start, end + 1): + out[code] = (code + delta) % 2 ** 16 + return out + class CmapRecord(Record): entries = [('platformID', ushort), From 316ae9867d1b39298ac2aac6aaf31c8071f2d287 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 12:20:17 +0300 Subject: [PATCH 03/16] src\rinoh\reference.py too-many-branches Method children had many branches computing the text variable. I extracted those base on the type value into _handle_type and replaces these branches with a single one, checking if the type is relevant. --- src/rinoh/reference.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/rinoh/reference.py b/src/rinoh/reference.py index adda04210..01e84cfc8 100644 --- a/src/rinoh/reference.py +++ b/src/rinoh/reference.py @@ -401,17 +401,9 @@ def copy(self, parent=None): def children(self, container): if container is None: text = '${}'.format(self.type) - elif self.type == PAGE_NUMBER: - text = container.page.formatted_number - elif self.type == NUMBER_OF_PAGES: - part = container.document_part - text = format_number(part.number_of_pages, part.page_number_format) - elif self.type == DOCUMENT_TITLE: - text = container.document.get_metadata('title') - elif self.type == DOCUMENT_SUBTITLE: - text = container.document.get_metadata('subtitle') - elif self.type == DOCUMENT_AUTHOR: - text = container.document.get_metadata('author') + elif self.type in [PAGE_NUMBER, NUMBER_OF_PAGES, DOCUMENT_TITLE + , DOCUMENT_SUBTITLE, DOCUMENT_AUTHOR]: + text = _handle_type(self, container) elif isinstance(self.type, SectionFieldType): doc = container.document section = container.page.get_current_section(self.type.level) @@ -448,6 +440,21 @@ def create_variable(key, style=None): return substitute_variables(text, cls.RE_FIELD, create_variable, substitute_others, style) +def _handle_type(self, container): + if self.type == PAGE_NUMBER: + text = container.page.formatted_number + elif self.type == NUMBER_OF_PAGES: + part = container.document_part + text = format_number(part.number_of_pages, part.page_number_format) + elif self.type == DOCUMENT_TITLE: + text = container.document.get_metadata('title') + elif self.type == DOCUMENT_SUBTITLE: + text = container.document.get_metadata('subtitle') + elif self.type == DOCUMENT_AUTHOR: + text = container.document.get_metadata('author') + + return text + def substitute_variables(text, split_regex, create_variable, substitute_others, style): From 3d9509e5eb228580a9c33d8cedbe2c564c270815 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 15:17:49 +0300 Subject: [PATCH 04/16] src\rinoh\attribute.py too-many-branches Method __new__ of the class WithAttributes had too many branches. I extracted a static method _handle_bases handing base classes logic. --- src/rinoh/attribute.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/rinoh/attribute.py b/src/rinoh/attribute.py index e2fdd2130..5fe2389fd 100644 --- a/src/rinoh/attribute.py +++ b/src/rinoh/attribute.py @@ -236,6 +236,16 @@ def __new__(mcls, classname, bases, cls_dict): .format(default, overrides)) supported_attributes = list(name for name in attributes) documented = set(supported_attributes) + WithAttributes._handle_bases(bases, doc, documented, supported_attributes) + if doc: + attr_doc = '\n '.join(chain([' Attributes:'], doc)) + cls_dict['__doc__'] = (cls_dict.get('__doc__', '') + '\n\n' + + attr_doc) + cls_dict['_supported_attributes'] = supported_attributes + return super().__new__(mcls, classname, bases, cls_dict) + + @staticmethod + def _handle_bases(bases, doc, documented, supported_attributes): for base_class in bases: try: supported_attributes.extend(base_class._supported_attributes) @@ -254,12 +264,6 @@ def __new__(mcls, classname, bases, cls_dict): .format(attr.accepted_type.__name__, format)) doc.append('\n *Default*: {}\n'.format(default)) documented.add(name) - if doc: - attr_doc = '\n '.join(chain([' Attributes:'], doc)) - cls_dict['__doc__'] = (cls_dict.get('__doc__', '') + '\n\n' - + attr_doc) - cls_dict['_supported_attributes'] = supported_attributes - return super().__new__(mcls, classname, bases, cls_dict) @property def _all_attributes(cls): From d6feaa9dbdab1c0d4c80c3444df6009b46561ae6 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 16:12:01 +0300 Subject: [PATCH 05/16] src\rinoh\__main__.py too-many-statements The function main had 116 statements (more than the recommended limit of 50). Most code handle different unrelated args so I extracted methods for them. --- src/rinoh/__main__.py | 239 +++++++++++++++++++++++++----------------- 1 file changed, 145 insertions(+), 94 deletions(-) diff --git a/src/rinoh/__main__.py b/src/rinoh/__main__.py index d5d83d99f..cb7d1642c 100644 --- a/src/rinoh/__main__.py +++ b/src/rinoh/__main__.py @@ -164,63 +164,26 @@ def main(): args = parser.parse_args() do_exit = False if args.versions: - print(f'rinohtype {__version__} ({__release_date__})') - with suppress(ImportError): - import sphinx - print(f'Sphinx {sphinx.__version__}') - print(f'Python {sys.version}') - print(platform()) + _handle_versions() return if args.docs: webbrowser.open(DOCS_URL) return if args.list_templates: - print('Installed document templates:') - for name, _ in sorted(DocumentTemplate.installed_resources): - print('- {}'.format(name)) + _handle_templates() do_exit = True if args.list_stylesheets: - print('Installed style sheets:') - for name, _ in sorted(StyleSheet.installed_resources): - print('- {}'.format(name)) + _handle_stylesheets() do_exit = True if args.list_formats: - print('Supported input file formats:') - for entry_point, dist in find_entry_points('rinoh.frontends'): - reader_cls = entry_point.load() - print('- {} (.{}) [{}]' - .format(entry_point.name, ', .'.join(reader_cls.extensions), - get_distribution_name(dist))) + _handle_formats() do_exit = True if args.list_options: - reader_name, reader_cls = get_reader_by_name(args.list_options) - if list(reader_cls.supported_attributes): - print('Options supported by the {} frontend'.format(reader_name)) - for name in reader_cls.supported_attributes: - attr_def = reader_cls.attribute_definition(name) - print('- {} ({}): {}. Default: {}' - .format(name, attr_def.accepted_type.__name__, - attr_def.description, attr_def.default_value)) - else: - print('The {} frontend takes no options'.format(reader_name)) + _handle_list_options(args) do_exit = True if args.list_fonts: if args.list_fonts is object: - print('Installed fonts:') - for typeface, distribution in installed_typefaces(): - print('- {} [{}]' .format(typeface.name, distribution)) - widths = OrderedDict() - for font in typeface.fonts(): - widths.setdefault(font.width, []).append(font) - for width, fonts in widths.items(): - styles = [] - for font in fonts: - style = FontWeight.to_name(font.weight) - if font.slant != FontSlant.UPRIGHT: - style = '{}-{}'.format(font.slant, style) - styles.append(style) - print(' {}: {}'.format(FontWidth.to_name(width), - ', '.join(styles))) + _handle_fonts() else: display_fonts(args.list_fonts) do_exit = True @@ -234,77 +197,50 @@ def main(): template_cfg = {} variables = {} cwd_source = CwdSource() - if args.stylesheet: - try: - stylesheet = StyleSheet.from_string(args.stylesheet, - source=cwd_source) - except FileNotFoundError: - raise SystemExit("Could not find the style sheet '{}'. " - "Aborting.\n" - "Make sure the path to your style sheet is " - "correct, or run `{} --list-stylesheets` to find " - "out which style sheets are installed." - .format(args.stylesheet, parser.prog)) - template_cfg['stylesheet'] = stylesheet + _handle_stylesheet(args, cwd_source, parser, template_cfg) if args.paper: - try: - variables['paper_size'] = Paper.from_string(args.paper.lower()) - except ValueError: - accepted = ', '.join(sorted(paper.name for paper - in PAPER_BY_NAME.values())) - raise SystemExit("Unknown paper size '{}'. Must be one of:\n" - " {}".format(args.paper, accepted)) + _handle_paper(args, variables) if not os.path.exists(args.input): raise SystemExit('{}: No such file'.format(args.input)) input_dir, input_filename = os.path.split(args.input) input_root, input_ext = os.path.splitext(input_filename) - if args.output: - if os.path.isdir(args.output): - output_path = os.path.join(args.output, input_root) - else: - output_path = args.output - else: - output_path = input_root + output_path = set_ouput_path(args, input_root) reader_name, reader_cls = (get_reader_by_name(args.format) if args.format else get_reader_by_extension(input_ext[1:])) str_options = dict((part.strip() for part in option.split('=', maxsplit=1)) for option, in args.option) - try: - options = {} - for key, str_value in str_options.items(): - attr_def = reader_cls.attribute_definition(key) - options[key] = attr_def.accepted_type.from_string(str_value) - except KeyError as e: - raise SystemExit('The {} frontend does not accept the option {}' - .format(reader_name, e)) - except ValueError as e: - raise SystemExit("The value passed to the '{}' option is not valid:\n" - ' {}'.format(key, e)) + options = _build_options(reader_cls, reader_name, str_options) reader = reader_cls(**options) - if os.path.isfile(args.template): - template_cfg['base'] = TemplateConfigurationFile(args.template, - source=cwd_source) - template_cls = template_cfg['base'].template - else: - try: - template_cls = DocumentTemplate.from_string(args.template) - except ResourceNotFound: - raise SystemExit("Could not find the template (configuration file) " - "'{}'. Aborting.\nMake sure the path to your " - "template configuration file is correct, or run " - "`{} --list-stylesheets` to find out which " - "templates are installed.".format(args.template, - parser.prog)) + template_cls = set_template(args, cwd_source, parser, template_cfg) configuration = template_cls.Configuration('rinoh command line options', **template_cfg) configuration.variables.update(variables) document_tree = reader.parse(args.input) + _do_rendering(args, configuration, document_tree, output_path, template_cls) + + +def _handle_stylesheet(args, cwd_source, parser, template_cfg): + if args.stylesheet: + try: + stylesheet = StyleSheet.from_string(args.stylesheet, + source=cwd_source) + except FileNotFoundError: + raise SystemExit("Could not find the style sheet '{}'. " + "Aborting.\n" + "Make sure the path to your style sheet is " + "correct, or run `{} --list-stylesheets` to find " + "out which style sheets are installed." + .format(args.stylesheet, parser.prog)) + template_cfg['stylesheet'] = stylesheet + + +def _do_rendering(args, configuration, document_tree, output_path, template_cls): while True: try: document = template_cls(document_tree, configuration=configuration) @@ -328,6 +264,121 @@ def main(): err.resource_name)) +def set_template(args, cwd_source, parser, template_cfg): + if os.path.isfile(args.template): + template_cfg['base'] = TemplateConfigurationFile(args.template, + source=cwd_source) + template_cls = template_cfg['base'].template + else: + try: + template_cls = DocumentTemplate.from_string(args.template) + except ResourceNotFound: + raise SystemExit("Could not find the template (configuration file) " + "'{}'. Aborting.\nMake sure the path to your " + "template configuration file is correct, or run " + "`{} --list-stylesheets` to find out which " + "templates are installed.".format(args.template, + parser.prog)) + return template_cls + + +def _build_options(reader_cls, reader_name, str_options): + try: + options = {} + for key, str_value in str_options.items(): + attr_def = reader_cls.attribute_definition(key) + options[key] = attr_def.accepted_type.from_string(str_value) + except KeyError as e: + raise SystemExit('The {} frontend does not accept the option {}' + .format(reader_name, e)) + except ValueError as e: + raise SystemExit("The value passed to the '{}' option is not valid:\n" + ' {}'.format(key, e)) + return options + + +def set_ouput_path(args, input_root): + if args.output: + if os.path.isdir(args.output): + output_path = os.path.join(args.output, input_root) + else: + output_path = args.output + else: + output_path = input_root + return output_path + + +def _handle_paper(args, variables): + try: + variables['paper_size'] = Paper.from_string(args.paper.lower()) + except ValueError: + accepted = ', '.join(sorted(paper.name for paper + in PAPER_BY_NAME.values())) + raise SystemExit("Unknown paper size '{}'. Must be one of:\n" + " {}".format(args.paper, accepted)) + + +def _handle_fonts(): + print('Installed fonts:') + for typeface, distribution in installed_typefaces(): + print('- {} [{}]'.format(typeface.name, distribution)) + widths = OrderedDict() + for font in typeface.fonts(): + widths.setdefault(font.width, []).append(font) + for width, fonts in widths.items(): + styles = [] + for font in fonts: + style = FontWeight.to_name(font.weight) + if font.slant != FontSlant.UPRIGHT: + style = '{}-{}'.format(font.slant, style) + styles.append(style) + print(' {}: {}'.format(FontWidth.to_name(width), + ', '.join(styles))) + + +def _handle_list_options(args): + reader_name, reader_cls = get_reader_by_name(args.list_options) + if list(reader_cls.supported_attributes): + print('Options supported by the {} frontend'.format(reader_name)) + for name in reader_cls.supported_attributes: + attr_def = reader_cls.attribute_definition(name) + print('- {} ({}): {}. Default: {}' + .format(name, attr_def.accepted_type.__name__, + attr_def.description, attr_def.default_value)) + else: + print('The {} frontend takes no options'.format(reader_name)) + + +def _handle_formats(): + print('Supported input file formats:') + for entry_point, dist in find_entry_points('rinoh.frontends'): + reader_cls = entry_point.load() + print('- {} (.{}) [{}]' + .format(entry_point.name, ', .'.join(reader_cls.extensions), + get_distribution_name(dist))) + + +def _handle_stylesheets(): + print('Installed style sheets:') + for name, _ in sorted(StyleSheet.installed_resources): + print('- {}'.format(name)) + + +def _handle_templates(): + print('Installed document templates:') + for name, _ in sorted(DocumentTemplate.installed_resources): + print('- {}'.format(name)) + + +def _handle_versions(): + print(f'rinohtype {__version__} ({__release_date__})') + with suppress(ImportError): + import sphinx + print(f'Sphinx {sphinx.__version__}') + print(f'Python {sys.version}') + print(platform()) + + class CwdSource(Source): @property def location(self): From 86ac9dad3181463f6111f9e44528660b1ab6252d Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 16:18:55 +0300 Subject: [PATCH 06/16] src\rinoh\image.py superfluous-parens Remove two unneeded parenthesis --- src/rinoh/image.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rinoh/image.py b/src/rinoh/image.py index bdc9be48c..f66ef8a29 100644 --- a/src/rinoh/image.py +++ b/src/rinoh/image.py @@ -78,7 +78,7 @@ def __eq__(self, other): return posix_path(str(self)) == posix_path(str(other)) def __ne__(self, other): - return not (self == other) + return not self == other class RequiredArg(Attribute): @@ -100,7 +100,7 @@ def from_tokens(cls, tokens, source): @classmethod def doc_format(cls): - return ('path to an image file enclosed in quotes') + return 'path to an image file enclosed in quotes' class ImageArgsBase(AttributesDictionary): From 5c1a7416e2c90711d8c1e2ecbe193cb90bfcf8bf Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 17:07:20 +0300 Subject: [PATCH 07/16] src\rinoh\font\opentype\cff.py line-too-long Made two readable line shorter --- src/rinoh/font/opentype/cff.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/rinoh/font/opentype/cff.py b/src/rinoh/font/opentype/cff.py index f184b0fc9..a0c4d4902 100644 --- a/src/rinoh/font/opentype/cff.py +++ b/src/rinoh/font/opentype/cff.py @@ -144,8 +144,10 @@ class TopDict(Dict): # and offset (0) (12, 20): Operator('SyntheticBase', number), # synthetic base font index (12, 21): Operator('PostScript', sid), # embedded PostScript language code - (12, 22): Operator('BaseFontName', sid), # (added as needed by Adobe-based technology) - (12, 23): Operator('BaseFontBlend', delta)} # (added as needed by Adobe-based technology) + (12, 22): Operator('BaseFontName' + , sid), # (added as needed by Adobe-based technology) + (12, 23): Operator('BaseFontBlend' + , delta)} # (added as needed by Adobe-based technology) class Index(list): From 18292c7c8036d5436144eea47a0fe02091fdb031 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 17:19:38 +0300 Subject: [PATCH 08/16] src\rinoh\backend\pdf\filter.py too-many-branches Function run_length_encoder had 14 branches (limit is 12). Extracted the _handle_byte function --- src/rinoh/backend/pdf/filter.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/rinoh/backend/pdf/filter.py b/src/rinoh/backend/pdf/filter.py index 20300c43f..9f24ba763 100644 --- a/src/rinoh/backend/pdf/filter.py +++ b/src/rinoh/backend/pdf/filter.py @@ -365,16 +365,7 @@ def flush(buffer): destination.write(buffer) return b'' - last_byte = yield - buffer = b'' - same_count = 1 - while True: - try: - byte = yield - except GeneratorExit: - break - if byte == b'': - break + def _handle_byte(buffer, byte, flush, last_byte, same_count, write_repeat): if byte != last_byte: if same_count > 2: _, same_count = write_repeat(last_byte, same_count) @@ -390,6 +381,19 @@ def flush(buffer): buffer = flush(buffer) if same_count == 128: byte, same_count = write_repeat(last_byte, same_count) + return buffer, byte, same_count + + last_byte = yield + buffer = b'' + same_count = 1 + while True: + try: + byte = yield + except GeneratorExit: + break + if byte == b'': + break + buffer, byte, same_count = _handle_byte(buffer, byte, flush, last_byte, same_count, write_repeat) last_byte = byte if same_count > 2: _, same_count = write_repeat(last_byte, same_count) From 0980f8b022c827dcf5d5ffd34b8c279e47e3933a Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 18:00:15 +0300 Subject: [PATCH 09/16] src\rinoh\table.py superfluous-parens remove unneeded parenthesis --- src/rinoh/table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rinoh/table.py b/src/rinoh/table.py index a6dc3dcd3..2d10237f2 100644 --- a/src/rinoh/table.py +++ b/src/rinoh/table.py @@ -410,7 +410,7 @@ def draw_cell_border(rendered_cell, cell_height, container): elif vertical_align == VerticalAlign.MIDDLE: vertical_offset = (cell_height - rendered_cell.height) / 2 elif vertical_align == VerticalAlign.BOTTOM: - vertical_offset = (cell_height - rendered_cell.height) + vertical_offset = cell_height - rendered_cell.height y_offset = float(y_cursor + vertical_offset) rendered_cell.container.place_at(container, x_cursor, y_offset) y_cursor += rendered_row.height From e0519083fddca5e154178336ab19dd06ae6c8c10 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 18:15:31 +0300 Subject: [PATCH 10/16] Update purepng.py Function check_time had 7 returns I kept the return value and return it in the end of the function, avoiding too many point of exit. Other than the return there is also an exception in case the the type is not supported. --- src/rinoh/backend/pdf/xobject/purepng.py | 31 +++++++++++++----------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/rinoh/backend/pdf/xobject/purepng.py b/src/rinoh/backend/pdf/xobject/purepng.py index 691f83856..03ecb3392 100644 --- a/src/rinoh/backend/pdf/xobject/purepng.py +++ b/src/rinoh/backend/pdf/xobject/purepng.py @@ -417,28 +417,31 @@ def check_color(c, greyscale, which): def check_time(value): """Convert time from most popular representations to datetime""" + time_value = None if value is None: - return None - if isinstance(value, (time.struct_time, tuple)): - return value - if isinstance(value, datetime.datetime): - return value.timetuple() - if isinstance(value, datetime.date): + time_value = None + elif isinstance(value, (time.struct_time, tuple)): + time_value = value + elif isinstance(value, datetime.datetime): + time_value = value.timetuple() + elif isinstance(value, datetime.date): res = datetime.datetime.utcnow() res.replace(year=value.year, month=value.month, day=value.day) - return res.timetuple() - if isinstance(value, datetime.time): - return datetime.datetime.combine(datetime.date.today(), + time_value = res.timetuple() + elif isinstance(value, datetime.time): + time_value = datetime.datetime.combine(datetime.date.today(), value).timetuple() - if isinteger(value): + elif isinteger(value): # Handle integer as timestamp - return time.gmtime(value) - if isinstance(value, basestring): + time_value = time.gmtime(value) + elif isinstance(value, basestring): if value.lower() == 'now': - return time.gmtime() + time_value = time.gmtime() # TODO: parsing some popular strings - raise ValueError("Unsupported time representation:" + repr(value)) + else: + raise ValueError("Unsupported time representation:" + repr(value)) + return time_value def popdict(src, keys): """ From fbafa07326766973990810b3ad994cc2bac6d5c0 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 19:27:39 +0300 Subject: [PATCH 11/16] src\rinoh\flowable.py too-many-branches Method render of class LabeledFlowable had 14 branches. Extracted the method _compute_spillover to structure and reduce branches --- src/rinoh/flowable.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/rinoh/flowable.py b/src/rinoh/flowable.py index 9f5dfaeff..d825996ac 100644 --- a/src/rinoh/flowable.py +++ b/src/rinoh/flowable.py @@ -725,19 +725,9 @@ def style(name): label_width = label_column_width or clamp(label_min_width, free_label_width, label_max_width) - if label_max_width is None: - label_spillover = True - elif free_label_width > label_width: - if style('wrap_label'): - vcontainer = VirtualContainer(container, width=label_max_width) - wrapped_width, _, _ = self.label.flow(vcontainer, 0) - if wrapped_width < label_max_width: - label_width = wrapped_width - else: - label_width = label_min_width - label_spillover = True - else: - label_spillover = True + label_spillover, label_width = self._compute_spillover(container + , free_label_width, label_max_width + , label_min_width, label_spillover, label_width, style) left = label_width + style('label_spacing') max_label_width = None if label_spillover else label_width @@ -785,6 +775,23 @@ def style(name): descender = label_descender return left + width, label_baseline, descender + def _compute_spillover(self, container, free_label_width, label_max_width, label_min_width, label_spillover, + label_width, style): + if label_max_width is None: + label_spillover = True + elif free_label_width > label_width: + if style('wrap_label'): + vcontainer = VirtualContainer(container, width=label_max_width) + wrapped_width, _, _ = self.label.flow(vcontainer, 0) + if wrapped_width < label_max_width: + label_width = wrapped_width + else: + label_width = label_min_width + label_spillover = True + else: + label_spillover = True + return label_spillover, label_width + def _flow_label(self, container, last_descender, max_width, y_offset, space_below): label_container = \ From 7356ad2013babb0f76b37c54cf179f5aad3885d7 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 19:37:53 +0300 Subject: [PATCH 12/16] src\rinoh\hyphenator.py superfluous-parens __all__ = ("Hyphenator") while it should be a list (e.g. __all__ = ["Hyphenator"]). Seems like a bug. https://stackoverflow.com/questions/44834/what-does-all-mean-in-python --- src/rinoh/hyphenator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rinoh/hyphenator.py b/src/rinoh/hyphenator.py index 99fed57dc..5b5381566 100644 --- a/src/rinoh/hyphenator.py +++ b/src/rinoh/hyphenator.py @@ -21,7 +21,7 @@ import sys import re -__all__ = ("Hyphenator") +__all__ = ["Hyphenator"] # cache of per-file Hyph_dict objects hdcache = {} From c9bdd017fd85f9f6fc3e6d343620a62c5373e8ac Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 21:06:07 +0300 Subject: [PATCH 13/16] src\rinoh\paragraph.py too-many-nested-blocks Method _spans_to_words of class ParagraphState had 7 nesting levels. I extracted _process_groups that goes over the groups to reduce that. --- src/rinoh/paragraph.py | 85 ++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 36 deletions(-) diff --git a/src/rinoh/paragraph.py b/src/rinoh/paragraph.py index 5c31be619..7992f058a 100644 --- a/src/rinoh/paragraph.py +++ b/src/rinoh/paragraph.py @@ -604,48 +604,61 @@ def _spans_to_words(self, spans): for _ in range(self.group_index): next(groups) group_index += 1 - for special, chars in groups: - group_index += 1 - if special is ForwardSlash: - self.group_index = group_index - 1 - if word: - container = yield word - word = Word() - elif special: - word_string = str(word).strip() - last_word = (word_string.rsplit(maxsplit=1)[-1] - if word_string else '') - if not (last_word.lower() in no_break_after - and special is Space): - self.group_index = group_index - 1 - if word: - container = yield word - for _ in chars: - container = yield special(span, lig_kern) - word = Word() - continue - part = ''.join(chars).replace('\N{NO-BREAK SPACE}', ' ') - if word and word[-1].span is span: - prev_glyphs_span = word.pop() - part = str(prev_glyphs_span) + part - try: - glyphs = [get_glyph_metrics(char) for char in part] - except MissingGlyphException: - # FIXME: span annotations are lost here - rest = ''.join(char for _, group in groups - for char in group) - rest_of_span = SingleStyledText(part + rest, parent=span) - missing_glyphs_spans = handle_missing_glyphs(rest_of_span, container) - break - glyphs = lig_kern(part, glyphs) - glyphs_span = GlyphsSpan(span, lig_kern, glyphs) - word.append(glyphs_span) + container, group_index, missing_glyphs_spans, word = yield from self._process_groups(container, + get_glyph_metrics, + group_index, + groups, + lig_kern, + missing_glyphs_spans, + no_break_after, + span, + word) except InlineFlowableException: glyphs_span = span.flow_inline(container, 0) word.append(glyphs_span) if word: yield word + def _process_groups(self, container, get_glyph_metrics, group_index, groups, lig_kern, missing_glyphs_spans, + no_break_after, span, word): + for special, chars in groups: + group_index += 1 + if special is ForwardSlash: + self.group_index = group_index - 1 + if word: + container = yield word + word = Word() + elif special: + word_string = str(word).strip() + last_word = (word_string.rsplit(maxsplit=1)[-1] + if word_string else '') + if not (last_word.lower() in no_break_after + and special is Space): + self.group_index = group_index - 1 + if word: + container = yield word + for _ in chars: + container = yield special(span, lig_kern) + word = Word() + continue + part = ''.join(chars).replace('\N{NO-BREAK SPACE}', ' ') + if word and word[-1].span is span: + prev_glyphs_span = word.pop() + part = str(prev_glyphs_span) + part + try: + glyphs = [get_glyph_metrics(char) for char in part] + except MissingGlyphException: + # FIXME: span annotations are lost here + rest = ''.join(char for _, group in groups + for char in group) + rest_of_span = SingleStyledText(part + rest, parent=span) + missing_glyphs_spans = handle_missing_glyphs(rest_of_span, container) + break + glyphs = lig_kern(part, glyphs) + glyphs_span = GlyphsSpan(span, lig_kern, glyphs) + word.append(glyphs_span) + return container, group_index, missing_glyphs_spans, word + def prepend_word(self, word): self._first_word = word From 0f0eef0214bad5542a6fd04e624faa5fe08f6978 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 21:25:07 +0300 Subject: [PATCH 14/16] src\rinoh\backend\pdf\reader.py too-many-nested-blocks Method read_dictionary_or_stream of class PDFObjectReader had 6 nesting levels. I extracted _handle_filter that takes only the dictionary as a parameter and encapsulates the creation of the stream_filter. --- src/rinoh/backend/pdf/reader.py | 54 ++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/src/rinoh/backend/pdf/reader.py b/src/rinoh/backend/pdf/reader.py index 9930e0d52..5ef01bf2f 100644 --- a/src/rinoh/backend/pdf/reader.py +++ b/src/rinoh/backend/pdf/reader.py @@ -160,31 +160,7 @@ def read_dictionary_or_stream(self, indirect=False): if self.next_token() == b'stream': self.jump_to_next_line() length = int(dictionary['Length']) - if 'Filter' in dictionary: - filter_or_filters = dictionary['Filter'] - if isinstance(filter_or_filters, cos.Name): - filter_class = FILTER_SUBCLASSES[filter_or_filters] - try: - decode_params = dictionary['DecodeParms'] - decode_params.__class__ = filter_class.params_class - except KeyError: - decode_params = None - stream_filter = filter_class(params=decode_params) - else: - filter_classes = [FILTER_SUBCLASSES[filter_name] - for filter_name in filter_or_filters] - try: - stream_filter = [] - for fltr_cls, params in zip(filter_classes, - dictionary['DecodeParms']): - if params: - params.__class__ = fltr_cls.params_class - stream_filter.append(fltr_cls(params=params)) - except KeyError: - stream_filter = [filter_class() - for filter_class in filter_classes] - else: - stream_filter = None + stream_filter = self._handle_filter(dictionary) stream = cos.Stream(stream_filter) # copy dict contents: .update() would dereference Reference values! for key, value in dictionary.items(): @@ -203,6 +179,34 @@ def read_dictionary_or_stream(self, indirect=False): dictionary.__class__ = DICTIONARY_SUBCLASSES[key] return dictionary + def _handle_filter(self, dictionary): + if 'Filter' in dictionary: + filter_or_filters = dictionary['Filter'] + if isinstance(filter_or_filters, cos.Name): + filter_class = FILTER_SUBCLASSES[filter_or_filters] + try: + decode_params = dictionary['DecodeParms'] + decode_params.__class__ = filter_class.params_class + except KeyError: + decode_params = None + stream_filter = filter_class(params=decode_params) + else: + filter_classes = [FILTER_SUBCLASSES[filter_name] + for filter_name in filter_or_filters] + try: + stream_filter = [] + for fltr_cls, params in zip(filter_classes, + dictionary['DecodeParms']): + if params: + params.__class__ = fltr_cls.params_class + stream_filter.append(fltr_cls(params=params)) + except KeyError: + stream_filter = [filter_class() + for filter_class in filter_classes] + else: + stream_filter = None + return stream_filter + escape_chars = b'nrtbf()\\' def read_string(self, indirect=False): From 1b6ea4dcfe98057ada2702fd392b87a7d19f578b Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 21:35:39 +0300 Subject: [PATCH 15/16] src\rinoh\font\type1.py too-many-branches The constructor of AdobeFontMetricsParser had 14 branches. I extracted _process_line with the content of the loop going over lines. --- src/rinoh/font/type1.py | 67 +++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/src/rinoh/font/type1.py b/src/rinoh/font/type1.py index c0ef78e75..e16fe3336 100644 --- a/src/rinoh/font/type1.py +++ b/src/rinoh/font/type1.py @@ -87,39 +87,42 @@ def __init__(self, file): continue if key == 'Comment': pass - elif key.startswith('Start'): - section_name = key[5:] - section_names.append(section_name) - section[section_name] = {} - section = section[section_name] - sections.append(section) - elif key.startswith('End'): - assert key[3:] == section_names.pop() - sections.pop() - section = sections[-1] - elif section_names[-1] == 'CharMetrics': - glyph_metrics = self._parse_character_metrics(line) - self._glyphs[glyph_metrics.name] = glyph_metrics - elif section_names[-1] == 'KernPairs': - tokens = line.split() - if tokens[0] == 'KPX': - pair, kerning = (tokens[1], tokens[2]), tokens[-1] - self._kerning_pairs[pair] = number(kerning) - else: - raise NotImplementedError - elif section_names[-1] == 'Composites': - warn('Composites in Type1 fonts are currently not supported.' - '({})'.format(self.filename) if self.filename else '') - elif key == chr(26): # EOF marker - assert not file.read() + self._process_line(file, key, line, section, section_names, sections, values) + + def _process_line(self, file, key, line, section, section_names, sections, values): + if key.startswith('Start'): + section_name = key[5:] + section_names.append(section_name) + section[section_name] = {} + section = section[section_name] + sections.append(section) + elif key.startswith('End'): + assert key[3:] == section_names.pop() + sections.pop() + section = sections[-1] + elif section_names[-1] == 'CharMetrics': + glyph_metrics = self._parse_character_metrics(line) + self._glyphs[glyph_metrics.name] = glyph_metrics + elif section_names[-1] == 'KernPairs': + tokens = line.split() + if tokens[0] == 'KPX': + pair, kerning = (tokens[1], tokens[2]), tokens[-1] + self._kerning_pairs[pair] = number(kerning) else: - funcs = self.KEYWORDS[key] - try: - values = [func(val) - for func, val in zip(funcs, values.split())] - except TypeError: - values = funcs(values) - section[key] = values + raise NotImplementedError + elif section_names[-1] == 'Composites': + warn('Composites in Type1 fonts are currently not supported.' + '({})'.format(self.filename) if self.filename else '') + elif key == chr(26): # EOF marker + assert not file.read() + else: + funcs = self.KEYWORDS[key] + try: + values = [func(val) + for func, val in zip(funcs, values.split())] + except TypeError: + values = funcs(values) + section[key] = values def _parse_character_metrics(self, line): ligatures = {} From 11bdfbeb486685db6e87538db9d669bc2d8a8e04 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 26 Oct 2024 21:46:34 +0300 Subject: [PATCH 16/16] src\rinoh\stylesheets\matcher.py wildcard-import Code uses from rinoh.styleds import * This hides the imoported objects and might lead to collisions in the future if objects of the same name are imported with wildcards. styleds is a collection of imports, aimed to ease related imports. I copied the used imports from there and deleted the unused ones. This adds many imports and does not benefits from styleds, so this is a downside of fixing this alert. --- src/rinoh/stylesheets/matcher.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/rinoh/stylesheets/matcher.py b/src/rinoh/stylesheets/matcher.py index e9adc873b..89106195a 100644 --- a/src/rinoh/stylesheets/matcher.py +++ b/src/rinoh/stylesheets/matcher.py @@ -1,7 +1,29 @@ from rinoh.document import DocumentTree from rinoh.style import StyledMatcher, SelectorByName -from rinoh.styleds import * +from rinoh.text import StyledText, SingleStyledText, MixedStyledText +from rinoh.image import Figure, Caption +from rinoh.image import ListOfFiguresSection, ListOfFigures +from rinoh.image import Image, InlineImage +from rinoh.flowable import GroupedFlowables, StaticGroupedFlowables +from rinoh.flowable import LabeledFlowable, GroupedLabeledFlowables +from rinoh.highlight import CodeBlock, CodeBlockWithCaption, Token +from rinoh.index import IndexSection, Index, IndexLabel, IndexEntry +from rinoh.paragraph import ParagraphBase, Paragraph +from rinoh.reference import ReferenceBase, Reference, DirectReference +from rinoh.reference import ReferenceField, ReferenceText, ReferencingParagraph +from rinoh.reference import Note +from rinoh.reference import NoteMarkerBase, NoteMarkerByID, NoteMarkerWithNote +from rinoh.structure import Header, Footer +from rinoh.structure import HorizontalRule +from rinoh.structure import List, ListItem, ListItemLabel, DefinitionList +from rinoh.structure import Section, TableOfContentsSection, Heading +from rinoh.structure import ListOfEntry +from rinoh.structure import Admonition, AdmonitionTitleParagraph +from rinoh.structure import TableOfContents, TableOfContentsEntry +from rinoh.table import TableWithCaption, Table, TableSection, TableHead, TableBody +from rinoh.table import TableRow, TableCell, TableCellBackground, TableCellBorder +from rinoh.table import ListOfTables, ListOfTablesSection __all__ = ['matcher']