Skip to content

Commit

Permalink
pythongh-126623: Update libexpat to 2.6.4, make future updates easier (
Browse files Browse the repository at this point in the history
…pythonGH-126792)

Update libexpat to 2.6.4, make future updates easier.
(cherry picked from commit 3c9996909402fadc98e6ca2a64e75a71a7427352)

Co-authored-by: Seth Michael Larson <[email protected]>
  • Loading branch information
sethmlarson authored and miss-islington committed Nov 13, 2024
1 parent 9162340 commit 66d95bb
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 22 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Upgrade libexpat to 2.6.4
22 changes: 11 additions & 11 deletions Misc/sbom.spdx.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions Modules/expat/expat.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@ enum XML_Error {
/* Added in 2.3.0. */
XML_ERROR_NO_BUFFER,
/* Added in 2.4.0. */
XML_ERROR_AMPLIFICATION_LIMIT_BREACH
XML_ERROR_AMPLIFICATION_LIMIT_BREACH,
/* Added in 2.6.4. */
XML_ERROR_NOT_STARTED,
};

enum XML_Content_Type {
Expand Down Expand Up @@ -1066,7 +1068,7 @@ XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
*/
#define XML_MAJOR_VERSION 2
#define XML_MINOR_VERSION 6
#define XML_MICRO_VERSION 3
#define XML_MICRO_VERSION 4

#ifdef __cplusplus
}
Expand Down
9 changes: 4 additions & 5 deletions Modules/expat/expat_external.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@
#ifndef Expat_External_INCLUDED
#define Expat_External_INCLUDED 1

/* Namespace external symbols to allow multiple libexpat version to
co-exist. */
#include "pyexpatns.h"

/* External API definitions */

/* Expat tries very hard to make the API boundary very specifically
Expand All @@ -64,11 +68,6 @@
compiled with the cdecl calling convention as the default since
system headers may assume the cdecl convention.
*/

/* Namespace external symbols to allow multiple libexpat version to
co-exist. */
#include "pyexpatns.h"

#ifndef XMLCALL
# if defined(_MSC_VER)
# define XMLCALL __cdecl
Expand Down
57 changes: 57 additions & 0 deletions Modules/expat/refresh.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env bash
#
# Use this script to update libexpat

set -e
set -o pipefail

if [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then
echo "A bash version >= 4 required. Got: $BASH_VERSION" >&2
exit 1
fi

# Update this when updating to a new version after verifying that the changes
# the update brings in are good. These values are used for verifying the SBOM, too.
expected_libexpat_tag="R_2_6_4"
expected_libexpat_version="2.6.4"
expected_libexpat_sha256="fd03b7172b3bd7427a3e7a812063f74754f24542429b634e0db6511b53fb2278"

expat_dir="$(realpath "$(dirname -- "${BASH_SOURCE[0]}")")"
cd ${expat_dir}

# Step 1: download and copy files
curl --location "https://github.com/libexpat/libexpat/releases/download/${expected_libexpat_tag}/expat-${expected_libexpat_version}.tar.gz" > libexpat.tar.gz
echo "${expected_libexpat_sha256} libexpat.tar.gz" | sha256sum --check

# Step 2: Pull files from the libexpat distribution
declare -a lib_files
lib_files=(
ascii.h
asciitab.h
expat.h
expat_external.h
iasciitab.h
internal.h
latin1tab.h
nametab.h
siphash.h
utf8tab.h
winconfig.h
xmlparse.c
xmlrole.c
xmlrole.h
xmltok.c
xmltok.h
xmltok_impl.c
xmltok_impl.h
xmltok_ns.c
)
for f in "${lib_files[@]}"; do
tar xzvf libexpat.tar.gz "expat-${expected_libexpat_version}/lib/${f}" --strip-components 2
done
rm libexpat.tar.gz

# Step 3: Add the namespacing include to expat_external.h
sed -i 's/#define Expat_External_INCLUDED 1/&\n\n\/* Namespace external symbols to allow multiple libexpat version to\n co-exist. \*\/\n#include "pyexpatns.h"/' expat_external.h

echo "Updated; verify all is okay using git diff and git status."
18 changes: 14 additions & 4 deletions Modules/expat/xmlparse.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* ba4cdf9bdb534f355a9def4c9e25d20ee8e72f95b0a4d930be52e563f5080196 (2.6.3+)
/* c5625880f4bf417c1463deee4eb92d86ff413f802048621c57e25fe483eb59e4 (2.6.4+)
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
Expand Down Expand Up @@ -40,6 +40,7 @@
Copyright (c) 2023 Owain Davies <[email protected]>
Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <[email protected]>
Copyright (c) 2024 Berkay Eren Ürün <[email protected]>
Copyright (c) 2024 Hanno Böck <[email protected]>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
Expand Down Expand Up @@ -2234,6 +2235,9 @@ XML_StopParser(XML_Parser parser, XML_Bool resumable) {
if (parser == NULL)
return XML_STATUS_ERROR;
switch (parser->m_parsingStatus.parsing) {
case XML_INITIALIZED:
parser->m_errorCode = XML_ERROR_NOT_STARTED;
return XML_STATUS_ERROR;
case XML_SUSPENDED:
if (resumable) {
parser->m_errorCode = XML_ERROR_SUSPENDED;
Expand All @@ -2244,7 +2248,7 @@ XML_StopParser(XML_Parser parser, XML_Bool resumable) {
case XML_FINISHED:
parser->m_errorCode = XML_ERROR_FINISHED;
return XML_STATUS_ERROR;
default:
case XML_PARSING:
if (resumable) {
#ifdef XML_DTD
if (parser->m_isParamEntity) {
Expand All @@ -2255,6 +2259,9 @@ XML_StopParser(XML_Parser parser, XML_Bool resumable) {
parser->m_parsingStatus.parsing = XML_SUSPENDED;
} else
parser->m_parsingStatus.parsing = XML_FINISHED;
break;
default:
assert(0);
}
return XML_STATUS_OK;
}
Expand Down Expand Up @@ -2519,6 +2526,9 @@ XML_ErrorString(enum XML_Error code) {
case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
return XML_L(
"limit on input amplification factor (from DTD and entities) breached");
/* Added in 2.6.4. */
case XML_ERROR_NOT_STARTED:
return XML_L("parser not started");
}
return NULL;
}
Expand Down Expand Up @@ -7856,7 +7866,7 @@ accountingReportDiff(XML_Parser rootParser,
assert(! rootParser->m_parentParser);

fprintf(stderr,
" (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
" (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"",
bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
levelsAwayFromRootParser, source_line, 10, "");

Expand Down Expand Up @@ -7969,7 +7979,7 @@ entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,

fprintf(
stderr,
"expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
"expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n",
(void *)rootParser, rootParser->m_entity_stats.countEverOpened,
rootParser->m_entity_stats.currentDepth,
rootParser->m_entity_stats.maximumDepthSeen,
Expand Down
28 changes: 28 additions & 0 deletions Tools/build/generate_sbom.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ class PackageFiles(typing.NamedTuple):
include=["Modules/expat/**"],
exclude=[
"Modules/expat/expat_config.h",
"Modules/expat/pyexpatns.h",
"Modules/_hacl/refresh.sh",
]
),
"macholib": PackageFiles(
Expand Down Expand Up @@ -221,6 +223,32 @@ def check_sbom_packages(sbom_data: dict[str, typing.Any]) -> None:
"HACL* SBOM version doesn't match value in 'Modules/_hacl/refresh.sh'"
)

# libexpat specifies its expected rev in a refresh script.
if package["name"] == "libexpat":
libexpat_refresh_sh = (CPYTHON_ROOT_DIR / "Modules/expat/refresh.sh").read_text()
libexpat_expected_version_match = re.search(
r"expected_libexpat_version=\"([0-9]+\.[0-9]+\.[0-9]+)\"",
libexpat_refresh_sh
)
libexpat_expected_sha256_match = re.search(
r"expected_libexpat_sha256=\"[a-f0-9]{40}\"",
libexpat_refresh_sh
)
libexpat_expected_version = libexpat_expected_version_match and libexpat_expected_version_match.group(1)
libexpat_expected_sha256 = libexpat_expected_sha256_match and libexpat_expected_sha256_match.group(1)

error_if(
libexpat_expected_version != version,
"libexpat SBOM version doesn't match value in 'Modules/expat/refresh.sh'"
)
error_if(
package["checksums"] != [{
"algorithm": "SHA256",
"checksumValue": libexpat_expected_sha256
}],
"libexpat SBOM checksum doesn't match value in 'Modules/expat/refresh.sh'"
)

# License must be on the approved list for SPDX.
license_concluded = package["licenseConcluded"]
error_if(
Expand Down

0 comments on commit 66d95bb

Please sign in to comment.