More streamlining.

This commit is contained in:
Jelmer Vernooij 2021-03-16 20:21:11 +00:00
parent b7206647b1
commit 891e097d3c

View file

@ -18,6 +18,7 @@
import apt_pkg import apt_pkg
from datetime import datetime from datetime import datetime
from debian.deb822 import Release
import os import os
import re import re
from typing import Iterator, List, Optional, Set from typing import Iterator, List, Optional, Set
@ -57,18 +58,36 @@ def contents_urls_from_sources_entry(source, arches, load_url):
dists_url = base_url + "/dists" dists_url = base_url + "/dists"
else: else:
dists_url = base_url dists_url = base_url
inrelease_url = "%s/%s/InRelease" % (dists_url, name)
try:
response = load_url(inrelease_url)
except FileNotFoundError:
release_url = "%s/%s/Release" % (dists_url, name)
try:
response = load_url(release_url)
except FileNotFoundError as e:
logging.warning('Unable to download %s or %s: %s', inrelease_url, release_url, e)
return
existing_names = {}
release = Release(response.read())
for hn in ['MD5Sum', 'SHA1Sum', 'SHA256Sum']:
for entry in release.get(hn, []):
existing_names[os.path.splitext(entry['name'])[0]] = entry['name']
contents_files = set()
if components: if components:
for component in components: for component in components:
for arch in arches: for arch in arches:
yield ( contents_files.add("%s/Contents-%s" % (component, arch))
"%s/%s/%s/Contents-%s"
% (dists_url, name, component, arch)
)
else: else:
for arch in arches: for arch in arches:
yield ( contents_files.add("Contents-%s" % (arch,))
"%s/%s/Contents-%s" % (dists_url, name.rstrip("/"), arch)
) for fn in contents_files:
if fn in existing_names:
url = "%s/%s/%s" % (dists_url, name, fn)
yield url
def contents_urls_from_sourceslist(sl, arch, load_url): def contents_urls_from_sourceslist(sl, arch, load_url):
@ -78,6 +97,20 @@ def contents_urls_from_sourceslist(sl, arch, load_url):
yield from contents_urls_from_sources_entry(source, arches, load_url) yield from contents_urls_from_sources_entry(source, arches, load_url)
def _unwrap(f, ext):
if ext == ".gz":
import gzip
return gzip.GzipFile(fileobj=f)
elif ext == ".xz":
import lzma
from io import BytesIO
f = BytesIO(lzma.decompress(f.read()))
else:
return f
def load_direct_url(url): def load_direct_url(url):
from urllib.error import HTTPError from urllib.error import HTTPError
from urllib.request import urlopen, Request from urllib.request import urlopen, Request
@ -93,23 +126,8 @@ def load_direct_url(url):
raise raise
break break
else: else:
raise ContentsFileNotFound(url) raise FileNotFoundError(url)
if ext == ".gz": return _unwrap(response, ext)
import gzip
f = gzip.GzipFile(fileobj=response)
elif ext == ".xz":
import lzma
from io import BytesIO
f = BytesIO(lzma.decompress(response.read()))
elif response.headers.get_content_type() == "text/plain":
f = response
else:
raise Exception(
"Unknown content type %r" % response.headers.get_content_type()
)
return f
def load_url_with_cache(url, cache_dirs): def load_url_with_cache(url, cache_dirs):
@ -123,13 +141,17 @@ def load_url_with_cache(url, cache_dirs):
def load_apt_cache_file(url, cache_dir): def load_apt_cache_file(url, cache_dir):
fn = apt_pkg.uri_to_filename(url) fn = apt_pkg.uri_to_filename(url)
p = os.path.join(cache_dir, fn + ".lz4") for ext in ['.xz', '.gz', '.lz4', '']:
p = os.path.join(cache_dir, fn + ext)
if not os.path.exists(p): if not os.path.exists(p):
raise FileNotFoundError(p) continue
logging.debug("Loading cached contents file %s", p)
#return os.popen('/usr/lib/apt/apt-helper cat-file %s' % p) #return os.popen('/usr/lib/apt/apt-helper cat-file %s' % p)
logging.debug("Loading cached contents file %s", p)
if ext == '.lz4':
import lz4.frame import lz4.frame
return lz4.frame.open(p, mode="rb") return lz4.frame.open(p, mode="rb")
return _unwrap(open(p, 'rb'), ext)
raise FileNotFoundError(url)
class AptCachedContentsFileSearcher(FileSearcher): class AptCachedContentsFileSearcher(FileSearcher):
@ -158,8 +180,8 @@ class AptCachedContentsFileSearcher(FileSearcher):
return load_url_with_cache(url, cache_dirs) return load_url_with_cache(url, cache_dirs)
urls = list( urls = list(
contents_urls_from_sourceslist(sl, get_build_architecture(), contents_urls_from_sourceslist(
load_url)) sl, get_build_architecture(), load_url))
self._load_urls(urls, cache_dirs, load_url) self._load_urls(urls, cache_dirs, load_url)
def load_from_session(self, session): def load_from_session(self, session):