From 5846dacf8d4d48ad9278ded327cbb5f0917a238b Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Wed, 22 Dec 2021 02:13:50 -0800 Subject: [PATCH] Support unpacking wheels that contain files with commas in their names (#427) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The csv module is now being used to read RECORD. Co-authored-by: Alex Grönholm --- src/wheel/wheelfile.py | 46 ++++++++++++------- tests/conftest.py | 2 +- .../mypackage/__init__.py | 0 .../mypackage/data/1,2,3.txt | 0 .../mypackage/data/__init__.py | 0 .../testdata/commasinfilenames.dist/setup.py | 12 +++++ .../testrepo-0.1.0/mypackage/__init__.py | 0 7 files changed, 42 insertions(+), 18 deletions(-) create mode 100644 tests/testdata/commasinfilenames.dist/mypackage/__init__.py create mode 100644 tests/testdata/commasinfilenames.dist/mypackage/data/1,2,3.txt create mode 100644 tests/testdata/commasinfilenames.dist/mypackage/data/__init__.py create mode 100644 tests/testdata/commasinfilenames.dist/setup.py create mode 100644 tests/testdata/commasinfilenames.dist/testrepo-0.1.0/mypackage/__init__.py diff --git a/src/wheel/wheelfile.py b/src/wheel/wheelfile.py index 3ee97dd..21e7361 100644 --- a/src/wheel/wheelfile.py +++ b/src/wheel/wheelfile.py @@ -5,6 +5,7 @@ import hashlib import os.path import re import stat +import sys import time from collections import OrderedDict from distutils import log as logger @@ -13,6 +14,16 @@ from zipfile import ZIP_DEFLATED, ZipInfo, ZipFile from wheel.cli import WheelError from wheel.util import urlsafe_b64decode, as_unicode, native, urlsafe_b64encode, as_bytes, StringIO +if sys.version_info >= (3,): + from io import TextIOWrapper + + def read_csv(fp): + return csv.reader(TextIOWrapper(fp, newline='', encoding='utf-8')) +else: + def read_csv(fp): + for line in csv.reader(fp): + yield [column.decode('utf-8') for column in line] + # Non-greedy matching of an optional build number may be too clever (more # invalid wheel filenames will match). Separate regex for .dist-info? WHEEL_INFO_RE = re.compile( @@ -60,23 +71,24 @@ class WheelFile(ZipFile): raise WheelError('Missing {} file'.format(self.record_path)) with record: - for line in record: - line = line.decode('utf-8') - path, hash_sum, size = line.rsplit(u',', 2) - if hash_sum: - algorithm, hash_sum = hash_sum.split(u'=') - try: - hashlib.new(algorithm) - except ValueError: - raise WheelError('Unsupported hash algorithm: {}'.format(algorithm)) - - if algorithm.lower() in {'md5', 'sha1'}: - raise WheelError( - 'Weak hash algorithm ({}) is not permitted by PEP 427' - .format(algorithm)) - - self._file_hashes[path] = ( - algorithm, urlsafe_b64decode(hash_sum.encode('ascii'))) + for line in read_csv(record): + path, hash_sum, size = line + if not hash_sum: + continue + + algorithm, hash_sum = hash_sum.split(u'=') + try: + hashlib.new(algorithm) + except ValueError: + raise WheelError('Unsupported hash algorithm: {}'.format(algorithm)) + + if algorithm.lower() in {'md5', 'sha1'}: + raise WheelError( + 'Weak hash algorithm ({}) is not permitted by PEP 427' + .format(algorithm)) + + self._file_hashes[path] = ( + algorithm, urlsafe_b64decode(hash_sum.encode('ascii'))) def open(self, name_or_info, mode="r", pwd=None): def _update_crc(newdata, eof=None): diff --git a/tests/conftest.py b/tests/conftest.py index 7c3698c..d9821b8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,7 +12,7 @@ import pytest @pytest.fixture(scope='session') def wheels_and_eggs(tmpdir_factory): """Build wheels and eggs from test distributions.""" - test_distributions = "complex-dist", "simple.dist", "headers.dist" + test_distributions = "complex-dist", "simple.dist", "headers.dist", "commasinfilenames.dist" if sys.version_info >= (3, 6): # Only Python 3.6+ can handle packaging unicode file names reliably # across different platforms diff --git a/tests/testdata/commasinfilenames.dist/mypackage/__init__.py b/tests/testdata/commasinfilenames.dist/mypackage/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/testdata/commasinfilenames.dist/mypackage/data/1,2,3.txt b/tests/testdata/commasinfilenames.dist/mypackage/data/1,2,3.txt new file mode 100644 index 0000000..e69de29 diff --git a/tests/testdata/commasinfilenames.dist/mypackage/data/__init__.py b/tests/testdata/commasinfilenames.dist/mypackage/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/testdata/commasinfilenames.dist/setup.py b/tests/testdata/commasinfilenames.dist/setup.py new file mode 100644 index 0000000..8cf9e4e --- /dev/null +++ b/tests/testdata/commasinfilenames.dist/setup.py @@ -0,0 +1,12 @@ +from setuptools import setup + +setup( + name='testrepo', + version='0.1', + packages=["mypackage"], + description='A test package with commas in file names', + include_package_data=True, + package_data={ + "mypackage.data": ["*"] + }, +) diff --git a/tests/testdata/commasinfilenames.dist/testrepo-0.1.0/mypackage/__init__.py b/tests/testdata/commasinfilenames.dist/testrepo-0.1.0/mypackage/__init__.py new file mode 100644 index 0000000..e69de29 -- 2.42.0.windows.2