[elbe-devel] [PATCH] elbepack: soap: use urllib to download files

Thomas Weißschuh thomas.weissschuh at linutronix.de
Wed Nov 13 12:22:47 CET 2024


All project files are available through the repo http server.
Using that instead of the custom SOAP bytestream is much faster and
requires less memory and code.

This bypasses the permission check, but the repo server is available
anyways, so the files are already reachable by anyone.
The user authentication is also going be removed anyways.

Closes #418

Signed-off-by: Thomas Weißschuh <thomas.weissschuh at linutronix.de>
---
 elbepack/daemons/soap/esoap.py    | 21 ---------------------
 elbepack/soapclient.py            | 31 ++++++++++---------------------
 newsfragments/+urllib.feature.rst |  1 +
 3 files changed, 11 insertions(+), 42 deletions(-)

diff --git a/elbepack/daemons/soap/esoap.py b/elbepack/daemons/soap/esoap.py
index 121bf479e1e7e644ed58a699e1238905428d9947..d93cc5be3fc261158d972f6d86b966c7a161ddda 100644
--- a/elbepack/daemons/soap/esoap.py
+++ b/elbepack/daemons/soap/esoap.py
@@ -140,27 +140,6 @@ class ESoap (ServiceBase):
 
         return part + 1
 
-    @rpc(String, String, Integer, _returns=String)
-    @authenticated_uid
-    def get_file(self, uid, builddir, filename, part):
-        self.app.pm.open_project(uid, builddir)
-
-        size = 1024 * 1024 * 5
-        pos = size * part
-        file_name = builddir + '/' + filename
-        file_stat = os.stat(file_name)
-
-        if pos >= file_stat.st_size:
-            return 'EndOfFile'
-
-        with open(file_name, 'rb') as fp:
-            try:
-                fp.seek(pos)
-                data = fp.read(size)
-                return binascii.b2a_base64(data)
-            except BaseException:
-                return 'EndOfFile'
-
     @rpc(String)
     @authenticated_uid
     def build_chroot_tarball(self, uid, builddir):
diff --git a/elbepack/soapclient.py b/elbepack/soapclient.py
index af5b302464c28060d1eedcbb768ed064ad2f54fb..52787f9225dbaf5d65ffb978c782230358af629e 100644
--- a/elbepack/soapclient.py
+++ b/elbepack/soapclient.py
@@ -13,6 +13,7 @@ import sys
 import time
 from http.client import BadStatusLine
 from urllib.error import URLError
+from urllib.request import urlopen, urlretrieve
 
 from suds.client import Client
 
@@ -79,10 +80,10 @@ class ElbeSoapClient:
         return cls(args.soaphost, args.soapport, args.soapuser, args.soappassword,
                    args.soaptimeout, retries=args.retries)
 
-    def download_file(self, builddir, filename, dst_fname):
-        fp = open(dst_fname, 'wb')
-        part = 0
+    def _file_download_url(self, builddir, filename):
+        return f'http://{self.host}:{self.port}/repo/{builddir}/{filename}'
 
+    def download_file(self, builddir, filename, dst_fname):
         # XXX the retry logic might get removed in the future, if the error
         # doesn't occur in real world. If it occurs, we should think about
         # the root cause instead of stupid retrying.
@@ -90,27 +91,20 @@ class ElbeSoapClient:
 
         while True:
             try:
-                ret = self.service.get_file(builddir, filename, part)
+                urlretrieve(self._file_download_url(builddir, filename), dst_fname)
+                return
             except BadStatusLine as e:
                 retry = retry - 1
 
-                print(f'get_file part {part} failed, retry {retry} times',
+                print(f'get_file {filename} failed, retry {retry} times',
                       file=sys.stderr)
                 print(str(e), file=sys.stderr)
                 print(repr(e.line), file=sys.stderr)
 
                 if not retry:
-                    fp.close()
                     print('file transfer failed', file=sys.stderr)
                     sys.exit(170)
 
-            if ret == 'EndOfFile':
-                fp.close()
-                return
-
-            fp.write(binascii.a2b_base64(ret))
-            part = part + 1
-
     @staticmethod
     def _upload_file(append, build_dir, filename):
         size = 1024 * 1024
@@ -234,11 +228,6 @@ class ElbeSoapClient:
         return result
 
     def dump_file(self, builddir, file):
-        part = 0
-        while True:
-            ret = self.service.get_file(builddir, file, part)
-            if ret == 'EndOfFile':
-                return
-
-            yield binascii.a2b_base64(ret)
-            part = part + 1
+        with urlopen(self._file_download_url(builddir, file)) as r:
+            for chunk in r:
+                yield chunk
diff --git a/newsfragments/+urllib.feature.rst b/newsfragments/+urllib.feature.rst
new file mode 100644
index 0000000000000000000000000000000000000000..7e618e8ed92a6aad3e31b2d0facaf43df1db3478
--- /dev/null
+++ b/newsfragments/+urllib.feature.rst
@@ -0,0 +1 @@
+Improve file download speed through usage of urllib.

---
base-commit: fd48f02889d0de2714489df27f43429dbe86260d
change-id: 20241113-download-urllib-f778cf7696f7

Best regards,
-- 
Thomas Weißschuh <thomas.weissschuh at linutronix.de>



More information about the elbe-devel mailing list