From b0abdac90c7e6aef444368781bcc3df24f69cab0 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 28 Mar 2019 11:42:02 +0000 Subject: [PATCH] scripts/version.py: Use regex grouping to extract the version The `lstrip` and `rstrip` functions take a set of characters to remove, not a prefix/suffix. Thus `rstrip('-x86_64')` will remove any trailing characters in the string `'-x86_64'` in any order (in effect it strips the suffix matching the regex `[-_x468]*`). So with `18.09.4` it removes the `4` suffix resulting in trying to `int('')` later on: Traceback (most recent call last): File "/src/scripts/versions.py", line 80, in main() File "/src/scripts/versions.py", line 73, in main versions, reverse=True, key=operator.attrgetter('order') File "/src/scripts/versions.py", line 52, in order return (int(self.major), int(self.minor), int(self.patch)) + stage ValueError: invalid literal for int() with base 10: '' Since we no longer need to check for the arch suffix (since it no longer appears in the URLs we are traversing) we could just drop the `rstrip` and invent a local prefix stripping helper to replace `lstrip('docker-')`. Instead lets take advantage of the behaviour of `re.findall` which is that if the regex contains a single `()` match that will be returned. This lets us match exactly the sub-section of the regex we require. While editing the regex, also ensure that the suffix is precisely `.tgz` and not merely `tgz` by adding an explicit `\.`, previously the literal `.` would be swallowed by the `.*` instead. Signed-off-by: Ian Campbell --- scripts/versions.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/scripts/versions.py b/scripts/versions.py index 7ad1d56a..93fe0d7f 100644 --- a/scripts/versions.py +++ b/scripts/versions.py @@ -62,13 +62,9 @@ def main(): for url in [base_url.format(cat) for cat in categories]: res = requests.get(url) content = res.text - versions = [ - Version.parse( - v.strip('"').lstrip('docker-').rstrip('.tgz').rstrip('-x86_64') - ) for v in re.findall( - r'"docker-[0-9]+\.[0-9]+\.[0-9]+-?.*tgz"', content - ) - ] + versions = [Version.parse(v) for v in re.findall( + r'"docker-([0-9]+\.[0-9]+\.[0-9]+)-?.*tgz"', content + )] sorted_versions = sorted( versions, reverse=True, key=operator.attrgetter('order') )