[lcov] Re-implement lcov reports using the same algorithm as XML repo…

…rts. This fixes five serious bugs: - The first field of a BRDA: line may not be zero (#1846). - The first field of a BRDA: line is supposed to be the *source* line of each instrumented branch, not the destination line. - The fourth field of a BRDA: line is supposed to be “-” when the branch was *never reached*, not when it was reached but never/always taken (which is what a branch’s presence in missing_arcs means). As far as I can tell, coverage.py currently doesn’t know of the existence of branches that were never reached. - The decision of whether to emit DA: and BRDA: lines at all is now taken strictly according to what’s in analysis.statements. This is important because some lines may appear in analysis.executed and/or analysis.executed_branch_arcs but *not* in analysis.statements. For example, the beginnings of docstrings are like this, as is the phantom line 1 of an empty __init__.py in Python 3.10 and earlier. (I am pleased to note that the special casing of empty __init__.py in the test suite is no longer required after this patch.) - We no longer attempt to call branch-coverage-related Analysis methods when analysis.has_arcs is false. And two minor annoyances: - DA: and BRDA: lines are now emitted strictly in ascending order by (source) line number. - Source file records are now sorted by *relative* pathname, not absolute pathname from the coverage database.
nedbat · Sep 9, 2024 · 074dee7 · 074dee7
1 parent 0afcc5c
commit 074dee7
Show file tree

Hide file tree

Showing 4 changed files with 89 additions and 96 deletions.
diff --git a/coverage/env.py b/coverage/env.py
@@ -99,10 +99,6 @@ class PYBEHAVIOR:
     # Some words are keywords in some places, identifiers in other places.
     soft_keywords = (PYVERSION >= (3, 10))
 
-    # Modules start with a line numbered zero. This means empty modules have
-    # only a 0-number line, which is ignored, giving a truly empty module.
-    empty_is_empty = (PYVERSION >= (3, 11, 0, "beta", 4))
-
     # PEP669 Low Impact Monitoring: https://peps.python.org/pep-0669/
     pep669 = bool(getattr(sys, "monitoring", None))
 

diff --git a/coverage/lcovreport.py b/coverage/lcovreport.py
@@ -53,13 +53,26 @@ def report(self, morfs: Iterable[TMorf] | None, outfile: IO[str]) -> float:
         self.coverage.get_data()
         outfile = outfile or sys.stdout
 
-        for fr, analysis in get_analysis_to_report(self.coverage, morfs):
+        # ensure file records are sorted by the _relative_ filename, not the full path
+        to_report = [
+            (fr.relative_filename(), fr, analysis)
+            for fr, analysis in get_analysis_to_report(self.coverage, morfs)
+        ]
+        to_report.sort()
+
+        for fname, fr, analysis in to_report:
             self.total += analysis.numbers
-            self.lcov_file(fr, analysis, outfile)
+            self.lcov_file(fname, fr, analysis, outfile)
 
         return self.total.n_statements and self.total.pc_covered
 
-    def lcov_file(self, fr: FileReporter, analysis: Analysis, outfile: IO[str]) -> None:
+    def lcov_file(
+        self,
+        rel_fname: str,
+        fr: FileReporter,
+        analysis: Analysis,
+        outfile: IO[str],
+    ) -> None:
         """Produces the lcov data for a single file.
 
         This currently supports both line and branch coverage,
@@ -70,74 +83,70 @@ def lcov_file(self, fr: FileReporter, analysis: Analysis, outfile: IO[str]) -> N
             if self.config.skip_empty:
                 return
 
-        outfile.write(f"SF:{fr.relative_filename()}\n")
+        outfile.write(f"SF:{rel_fname}\n")
 
-        source_lines = fr.source().splitlines()
-        for covered in sorted(analysis.executed):
-            if covered in analysis.excluded:
-                # Do not report excluded as executed
-                continue
+        if self.config.lcov_line_checksums:
+            source_lines = fr.source().splitlines()
 
-            if source_lines:
-                if covered-1 >= len(source_lines):
-                    break
-                line = source_lines[covered-1]
-            else:
-                line = ""
+        # Emit a DA: record for each line of the file.
+        lines = sorted(analysis.statements)
+        hash_suffix = ""
+        for line in lines:
             if self.config.lcov_line_checksums:
-                hash_suffix = "," + line_hash(line)
-            else:
-                hash_suffix = ""
-
-            # Note: Coverage.py currently only supports checking *if* a line
-            # has been executed, not how many times, so we set this to 1 for
-            # nice output even if it's technically incorrect.
-            outfile.write(f"DA:{covered},1{hash_suffix}\n")
-
-        for missed in sorted(analysis.missing):
-            # We don't have to skip excluded lines here, because `missing`
-            # already doesn't have them.
-            assert source_lines
-            line = source_lines[missed-1]
-            if self.config.lcov_line_checksums:
-                hash_suffix = "," + line_hash(line)
-            else:
-                hash_suffix = ""
-            outfile.write(f"DA:{missed},0{hash_suffix}\n")
+                hash_suffix = "," + line_hash(source_lines[line-1])
+            # Q: can we get info about the number of times a statement is
+            # executed?  If so, that should be recorded here.
+            hit = int(line not in analysis.missing)
+            outfile.write(f"DA:{line},{hit}{hash_suffix}\n")
 
         if analysis.numbers.n_statements > 0:
             outfile.write(f"LF:{analysis.numbers.n_statements}\n")
             outfile.write(f"LH:{analysis.numbers.n_executed}\n")
 
-        # More information dense branch coverage data.
-        missing_arcs = analysis.missing_branch_arcs()
-        executed_arcs = analysis.executed_branch_arcs()
-        for block_number, block_line_number in enumerate(
-            sorted(analysis.branch_stats().keys()),
-        ):
-            for branch_number, line_number in enumerate(
-                sorted(missing_arcs[block_line_number]),
-            ):
-                # The exit branches have a negative line number,
-                # this will not produce valid lcov. Setting
-                # the line number of the exit branch to 0 will allow
-                # for valid lcov, while preserving the data.
-                line_number = max(line_number, 0)
-                outfile.write(f"BRDA:{line_number},{block_number},{branch_number},-\n")
-
-            # The start value below allows for the block number to be
-            # preserved between these two for loops (stopping the loop from
-            # resetting the value of the block number to 0).
-            for branch_number, line_number in enumerate(
-                sorted(executed_arcs[block_line_number]),
-                start=len(missing_arcs[block_line_number]),
-            ):
-                line_number = max(line_number, 0)
-                outfile.write(f"BRDA:{line_number},{block_number},{branch_number},1\n")
-
-        # Summary of the branch coverage.
+        # More information dense branch coverage data, if available.
         if analysis.has_arcs:
             branch_stats = analysis.branch_stats()
+            executed_arcs = analysis.executed_branch_arcs()
+            missing_arcs = analysis.missing_branch_arcs()
+
+            for line in lines:
+                if line in branch_stats:
+                    # The meaning of a BRDA: line is not well explained in the lcov
+                    # documentation.  Based on what genhtml does with them, however,
+                    # the interpretation is supposed to be something like this:
+                    # BRDA: <line>, <block>, <branch>, <hit>
+                    # where <line> is the source line number of the *origin* of the
+                    # branch; <block> is an arbitrary number which distinguishes multiple
+                    # control flow operations on a single line; <branch> is an arbitrary
+                    # number which distinguishes the possible destinations of the specific
+                    # control flow operation identified by <line> + <block>; and <hit> is
+                    # either the hit count for <line> + <block> + <branch> or "-" meaning
+                    # that <line> + <block> was never *reached*.  <line> must be >= 1,
+                    # and <block>, <branch>, <hit> must be >= 0.
+
+                    # This is only one possible way to map our sets of executed and
+                    # not-executed arcs to BRDA codes. It seems to produce reasonable
+                    # results when fed through genhtml.
+
+                    # Q: can we get counts of the number of times each arc was executed?
+                    # branch_stats has "total" and "taken" counts for each branch, but it
+                    # doesn't have "taken" broken down by destination.
+                    destinations = {}
+                    for dst in executed_arcs[line]:
+                        destinations[(int(dst < 0), abs(dst))] = 1
+                    for dst in missing_arcs[line]:
+                        destinations[(int(dst < 0), abs(dst))] = 0
+
+                    if all(v == 0 for v in destinations.values()):
+                        # When _none_ of the out arcs from 'line' were executed, presume
+                        # 'line' was never reached.
+                        for branch, _ in enumerate(sorted(destinations.keys())):
+                            outfile.write(f"BRDA:{line},0,{branch},-\n")
+                    else:
+                        for branch, (_, hit) in enumerate(sorted(destinations.items())):
+                            outfile.write(f"BRDA:{line},0,{branch},{hit}\n")
+
+            # Summary of the branch coverage.
             brf = sum(t for t, k in branch_stats.values())
             brh = brf - sum(t - k for t, k in branch_stats.values())
             if brf > 0:

diff --git a/tests/test_lcov.py b/tests/test_lcov.py
@@ -11,7 +11,6 @@
 from tests.coveragetest import CoverageTest
 
 import coverage
-from coverage import env
 
 
 class LcovTest(CoverageTest):
@@ -60,8 +59,8 @@ def IsItTrue():
         expected_result = textwrap.dedent("""\
             SF:main_file.py
             DA:1,1
-            DA:4,1
             DA:2,0
+            DA:4,1
             DA:5,0
             LF:4
             LH:2
@@ -92,8 +91,8 @@ def IsItTrue():
         expected_result = textwrap.dedent("""\
             SF:main_file.py
             DA:1,1,7URou3io0zReBkk69lEb/Q
-            DA:4,1,ilhb4KUfytxtEuClijZPlQ
             DA:2,0,Xqj6H1iz/nsARMCAbE90ng
+            DA:4,1,ilhb4KUfytxtEuClijZPlQ
             DA:5,0,LWILTcvARcydjFFyo9qM0A
             LF:4
             LH:2
@@ -116,8 +115,8 @@ def test_simple_line_coverage_two_files(self) -> None:
         expected_result = textwrap.dedent("""\
             SF:main_file.py
             DA:1,1
-            DA:4,1
             DA:2,0
+            DA:4,1
             DA:5,0
             LF:4
             LH:2
@@ -161,8 +160,8 @@ def is_it_x(x):
             DA:5,0
             LF:4
             LH:1
-            BRDA:3,0,0,-
-            BRDA:5,0,1,-
+            BRDA:2,0,0,-
+            BRDA:2,0,1,-
             BRF:2
             BRH:0
             end_of_record
@@ -204,8 +203,8 @@ def test_is_it_x(self):
             DA:5,0
             LF:4
             LH:1
-            BRDA:3,0,0,-
-            BRDA:5,0,1,-
+            BRDA:2,0,0,-
+            BRDA:2,0,1,-
             BRF:2
             BRH:0
             end_of_record
@@ -248,8 +247,8 @@ def test_half_covered_branch(self) -> None:
             DA:6,0
             LF:4
             LH:3
-            BRDA:6,0,0,-
-            BRDA:4,0,1,1
+            BRDA:3,0,0,1
+            BRDA:3,0,1,0
             BRF:2
             BRH:1
             end_of_record
@@ -258,30 +257,19 @@ def test_half_covered_branch(self) -> None:
         assert expected_result == actual_result
 
     def test_empty_init_files(self) -> None:
-        # Test that an empty __init__.py still generates a (mostly vacuous)
-        # coverage record.  The overall coverage will be zero lines of code
-        # and zero branches to execute, and therefore no LF/LH nor BRF/BRH
-        # lines will be emitted.  However, in old Pythons there will be one
-        # DA line emitted for the empty source line 1.
+        # Test that an empty __init__.py still generates a (vacuous)
+        # coverage record.
         self.make_file("__init__.py", "")
         self.assert_doesnt_exist(".coverage")
         cov = coverage.Coverage(branch=True, source=".")
         self.start_import_stop(cov, "__init__")
         pct = cov.lcov_report()
         assert pct == 0.0
         self.assert_exists("coverage.lcov")
-        # Newer Pythons have truly empty empty files.
-        if env.PYBEHAVIOR.empty_is_empty:
-            expected_result = textwrap.dedent("""\
-                SF:__init__.py
-                end_of_record
-                """)
-        else:
-            expected_result = textwrap.dedent("""\
-                SF:__init__.py
-                DA:1,1
-                end_of_record
-                """)
+        expected_result = textwrap.dedent("""\
+            SF:__init__.py
+            end_of_record
+            """)
         actual_result = self.get_lcov_report_content()
         assert expected_result == actual_result
 
@@ -323,12 +311,12 @@ def test_excluded_lines(self) -> None:
             SF:runme.py
             DA:1,1
             DA:3,1
-            DA:6,1
             DA:4,0
+            DA:6,1
             LF:4
             LH:3
-            BRDA:4,0,0,-
-            BRDA:6,0,1,1
+            BRDA:3,0,0,0
+            BRDA:3,0,1,1
             BRF:2
             BRH:1
             end_of_record

diff --git a/tests/test_report_common.py b/tests/test_report_common.py
@@ -270,8 +270,8 @@ def test_lcov(self) -> None:
         expected = textwrap.dedent("""\
             SF:good.j2
             DA:1,1
-            DA:3,1
             DA:2,0
+            DA:3,1
             LF:3
             LH:2
             end_of_record