Skip to content

Commit

Permalink
[lcov] Re-implement lcov reports using the same algorithm as XML repo…
Browse files Browse the repository at this point in the history
…rts.

This fixes five serious bugs:

- The first field of a BRDA: line may not be zero (#1846).
- The first field of a BRDA: line is supposed to be the *source* line of each
  instrumented branch, not the destination line.
- The fourth field of a BRDA: line is supposed to be “-” when the branch
  was *never reached*, not when it was reached but never/always taken (which
  is what a branch’s presence in missing_arcs means).  As far as I can tell,
  coverage.py currently doesn’t know of the existence of branches that were
  never reached.

- The decision of whether to emit DA: and BRDA: lines at all is now taken
  strictly according to what’s in analysis.statements.  This is important
  because some lines may appear in analysis.executed and/or
  analysis.executed_branch_arcs but *not* in analysis.statements.
  For example, the beginnings of docstrings are like this, as is the
  phantom line 1 of an empty __init__.py in Python 3.10 and earlier.

  (I am pleased to note that the special casing of empty __init__.py in
  the test suite is no longer required after this patch.)

- We no longer attempt to call branch-coverage-related Analysis methods
  when analysis.has_arcs is false.

And two minor annoyances:

- DA: and BRDA: lines are now emitted strictly in ascending order by (source)
  line number.
- Source file records are now sorted by *relative* pathname, not absolute
  pathname from the coverage database.
  • Loading branch information
zackw committed Sep 9, 2024
1 parent 0afcc5c commit 074dee7
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 96 deletions.
4 changes: 0 additions & 4 deletions coverage/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,6 @@ class PYBEHAVIOR:
# Some words are keywords in some places, identifiers in other places.
soft_keywords = (PYVERSION >= (3, 10))

# Modules start with a line numbered zero. This means empty modules have
# only a 0-number line, which is ignored, giving a truly empty module.
empty_is_empty = (PYVERSION >= (3, 11, 0, "beta", 4))

# PEP669 Low Impact Monitoring: https://peps.python.org/pep-0669/
pep669 = bool(getattr(sys, "monitoring", None))

Expand Down
131 changes: 70 additions & 61 deletions coverage/lcovreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,26 @@ def report(self, morfs: Iterable[TMorf] | None, outfile: IO[str]) -> float:
self.coverage.get_data()
outfile = outfile or sys.stdout

for fr, analysis in get_analysis_to_report(self.coverage, morfs):
# ensure file records are sorted by the _relative_ filename, not the full path
to_report = [
(fr.relative_filename(), fr, analysis)
for fr, analysis in get_analysis_to_report(self.coverage, morfs)
]
to_report.sort()

for fname, fr, analysis in to_report:
self.total += analysis.numbers
self.lcov_file(fr, analysis, outfile)
self.lcov_file(fname, fr, analysis, outfile)

return self.total.n_statements and self.total.pc_covered

def lcov_file(self, fr: FileReporter, analysis: Analysis, outfile: IO[str]) -> None:
def lcov_file(
self,
rel_fname: str,
fr: FileReporter,
analysis: Analysis,
outfile: IO[str],
) -> None:
"""Produces the lcov data for a single file.
This currently supports both line and branch coverage,
Expand All @@ -70,74 +83,70 @@ def lcov_file(self, fr: FileReporter, analysis: Analysis, outfile: IO[str]) -> N
if self.config.skip_empty:
return

outfile.write(f"SF:{fr.relative_filename()}\n")
outfile.write(f"SF:{rel_fname}\n")

source_lines = fr.source().splitlines()
for covered in sorted(analysis.executed):
if covered in analysis.excluded:
# Do not report excluded as executed
continue
if self.config.lcov_line_checksums:
source_lines = fr.source().splitlines()

if source_lines:
if covered-1 >= len(source_lines):
break
line = source_lines[covered-1]
else:
line = ""
# Emit a DA: record for each line of the file.
lines = sorted(analysis.statements)
hash_suffix = ""
for line in lines:
if self.config.lcov_line_checksums:
hash_suffix = "," + line_hash(line)
else:
hash_suffix = ""

# Note: Coverage.py currently only supports checking *if* a line
# has been executed, not how many times, so we set this to 1 for
# nice output even if it's technically incorrect.
outfile.write(f"DA:{covered},1{hash_suffix}\n")

for missed in sorted(analysis.missing):
# We don't have to skip excluded lines here, because `missing`
# already doesn't have them.
assert source_lines
line = source_lines[missed-1]
if self.config.lcov_line_checksums:
hash_suffix = "," + line_hash(line)
else:
hash_suffix = ""
outfile.write(f"DA:{missed},0{hash_suffix}\n")
hash_suffix = "," + line_hash(source_lines[line-1])
# Q: can we get info about the number of times a statement is
# executed? If so, that should be recorded here.
hit = int(line not in analysis.missing)
outfile.write(f"DA:{line},{hit}{hash_suffix}\n")

if analysis.numbers.n_statements > 0:
outfile.write(f"LF:{analysis.numbers.n_statements}\n")
outfile.write(f"LH:{analysis.numbers.n_executed}\n")

# More information dense branch coverage data.
missing_arcs = analysis.missing_branch_arcs()
executed_arcs = analysis.executed_branch_arcs()
for block_number, block_line_number in enumerate(
sorted(analysis.branch_stats().keys()),
):
for branch_number, line_number in enumerate(
sorted(missing_arcs[block_line_number]),
):
# The exit branches have a negative line number,
# this will not produce valid lcov. Setting
# the line number of the exit branch to 0 will allow
# for valid lcov, while preserving the data.
line_number = max(line_number, 0)
outfile.write(f"BRDA:{line_number},{block_number},{branch_number},-\n")

# The start value below allows for the block number to be
# preserved between these two for loops (stopping the loop from
# resetting the value of the block number to 0).
for branch_number, line_number in enumerate(
sorted(executed_arcs[block_line_number]),
start=len(missing_arcs[block_line_number]),
):
line_number = max(line_number, 0)
outfile.write(f"BRDA:{line_number},{block_number},{branch_number},1\n")

# Summary of the branch coverage.
# More information dense branch coverage data, if available.
if analysis.has_arcs:
branch_stats = analysis.branch_stats()
executed_arcs = analysis.executed_branch_arcs()
missing_arcs = analysis.missing_branch_arcs()

for line in lines:
if line in branch_stats:
# The meaning of a BRDA: line is not well explained in the lcov
# documentation. Based on what genhtml does with them, however,
# the interpretation is supposed to be something like this:
# BRDA: <line>, <block>, <branch>, <hit>
# where <line> is the source line number of the *origin* of the
# branch; <block> is an arbitrary number which distinguishes multiple
# control flow operations on a single line; <branch> is an arbitrary
# number which distinguishes the possible destinations of the specific
# control flow operation identified by <line> + <block>; and <hit> is
# either the hit count for <line> + <block> + <branch> or "-" meaning
# that <line> + <block> was never *reached*. <line> must be >= 1,
# and <block>, <branch>, <hit> must be >= 0.

# This is only one possible way to map our sets of executed and
# not-executed arcs to BRDA codes. It seems to produce reasonable
# results when fed through genhtml.

# Q: can we get counts of the number of times each arc was executed?
# branch_stats has "total" and "taken" counts for each branch, but it
# doesn't have "taken" broken down by destination.
destinations = {}
for dst in executed_arcs[line]:
destinations[(int(dst < 0), abs(dst))] = 1
for dst in missing_arcs[line]:
destinations[(int(dst < 0), abs(dst))] = 0

if all(v == 0 for v in destinations.values()):
# When _none_ of the out arcs from 'line' were executed, presume
# 'line' was never reached.
for branch, _ in enumerate(sorted(destinations.keys())):
outfile.write(f"BRDA:{line},0,{branch},-\n")
else:
for branch, (_, hit) in enumerate(sorted(destinations.items())):
outfile.write(f"BRDA:{line},0,{branch},{hit}\n")

# Summary of the branch coverage.
brf = sum(t for t, k in branch_stats.values())
brh = brf - sum(t - k for t, k in branch_stats.values())
if brf > 0:
Expand Down
48 changes: 18 additions & 30 deletions tests/test_lcov.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from tests.coveragetest import CoverageTest

import coverage
from coverage import env


class LcovTest(CoverageTest):
Expand Down Expand Up @@ -60,8 +59,8 @@ def IsItTrue():
expected_result = textwrap.dedent("""\
SF:main_file.py
DA:1,1
DA:4,1
DA:2,0
DA:4,1
DA:5,0
LF:4
LH:2
Expand Down Expand Up @@ -92,8 +91,8 @@ def IsItTrue():
expected_result = textwrap.dedent("""\
SF:main_file.py
DA:1,1,7URou3io0zReBkk69lEb/Q
DA:4,1,ilhb4KUfytxtEuClijZPlQ
DA:2,0,Xqj6H1iz/nsARMCAbE90ng
DA:4,1,ilhb4KUfytxtEuClijZPlQ
DA:5,0,LWILTcvARcydjFFyo9qM0A
LF:4
LH:2
Expand All @@ -116,8 +115,8 @@ def test_simple_line_coverage_two_files(self) -> None:
expected_result = textwrap.dedent("""\
SF:main_file.py
DA:1,1
DA:4,1
DA:2,0
DA:4,1
DA:5,0
LF:4
LH:2
Expand Down Expand Up @@ -161,8 +160,8 @@ def is_it_x(x):
DA:5,0
LF:4
LH:1
BRDA:3,0,0,-
BRDA:5,0,1,-
BRDA:2,0,0,-
BRDA:2,0,1,-
BRF:2
BRH:0
end_of_record
Expand Down Expand Up @@ -204,8 +203,8 @@ def test_is_it_x(self):
DA:5,0
LF:4
LH:1
BRDA:3,0,0,-
BRDA:5,0,1,-
BRDA:2,0,0,-
BRDA:2,0,1,-
BRF:2
BRH:0
end_of_record
Expand Down Expand Up @@ -248,8 +247,8 @@ def test_half_covered_branch(self) -> None:
DA:6,0
LF:4
LH:3
BRDA:6,0,0,-
BRDA:4,0,1,1
BRDA:3,0,0,1
BRDA:3,0,1,0
BRF:2
BRH:1
end_of_record
Expand All @@ -258,30 +257,19 @@ def test_half_covered_branch(self) -> None:
assert expected_result == actual_result

def test_empty_init_files(self) -> None:
# Test that an empty __init__.py still generates a (mostly vacuous)
# coverage record. The overall coverage will be zero lines of code
# and zero branches to execute, and therefore no LF/LH nor BRF/BRH
# lines will be emitted. However, in old Pythons there will be one
# DA line emitted for the empty source line 1.
# Test that an empty __init__.py still generates a (vacuous)
# coverage record.
self.make_file("__init__.py", "")
self.assert_doesnt_exist(".coverage")
cov = coverage.Coverage(branch=True, source=".")
self.start_import_stop(cov, "__init__")
pct = cov.lcov_report()
assert pct == 0.0
self.assert_exists("coverage.lcov")
# Newer Pythons have truly empty empty files.
if env.PYBEHAVIOR.empty_is_empty:
expected_result = textwrap.dedent("""\
SF:__init__.py
end_of_record
""")
else:
expected_result = textwrap.dedent("""\
SF:__init__.py
DA:1,1
end_of_record
""")
expected_result = textwrap.dedent("""\
SF:__init__.py
end_of_record
""")
actual_result = self.get_lcov_report_content()
assert expected_result == actual_result

Expand Down Expand Up @@ -323,12 +311,12 @@ def test_excluded_lines(self) -> None:
SF:runme.py
DA:1,1
DA:3,1
DA:6,1
DA:4,0
DA:6,1
LF:4
LH:3
BRDA:4,0,0,-
BRDA:6,0,1,1
BRDA:3,0,0,0
BRDA:3,0,1,1
BRF:2
BRH:1
end_of_record
Expand Down
2 changes: 1 addition & 1 deletion tests/test_report_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,8 @@ def test_lcov(self) -> None:
expected = textwrap.dedent("""\
SF:good.j2
DA:1,1
DA:3,1
DA:2,0
DA:3,1
LF:3
LH:2
end_of_record
Expand Down

0 comments on commit 074dee7

Please sign in to comment.