[elbe-devel] [PATCH] archivedir: reimplement enbase more perfomantly

bage at linutronix.de bage at linutronix.de
Sat Feb 1 10:03:48 CET 2020


From: Bastian Germann <bage at linutronix.de>

Splitting the base64 string into 60 character lines in Python is not performant
at all.  I tried this with a 500MiB file and waited for over one hour.

Reimplement the enbase on top of base64.encodestring which outputs 76 character
lines.  While at it, get rid of the import ... as ...

Signed-off-by: Bastian Germann <bage at linutronix.de>
---
 elbepack/archivedir.py | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/elbepack/archivedir.py b/elbepack/archivedir.py
index 785a8ac82..1bf4e5b42 100644
--- a/elbepack/archivedir.py
+++ b/elbepack/archivedir.py
@@ -3,6 +3,7 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
+import bz2
 import os
 import re
 import sys
@@ -13,8 +14,7 @@ try:
 except ImportError:
     from urlparse import urljoin,urlparse
 
-from base64 import standard_b64encode, standard_b64decode
-from bz2 import compress as bz2compress
+from base64 import encodestring, standard_b64decode
 from subprocess import CalledProcessError
 from tempfile import NamedTemporaryFile
 
@@ -27,20 +27,12 @@ class ArchivedirError(Exception):
     pass
 
 def enbase(fname, compress=True):
-    infile = open(fname, "rb")
-    s = infile.read()
-    if compress:
-        s = bz2compress(s)
-
-    enc = standard_b64encode(s).decode()
-    splited = ""
-    i = 0
-    l_enc = len(enc)
-    while i < l_enc:
-        splited += (enc[i:i + 60] + "\n")
-        i += 60
-
-    return splited
+    with open(fname, "rb") as infile:
+        s = infile.read()
+        if compress:
+            s = bz2.compress(s)
+
+        return encodestring(s)
 
 def collect(tararchive, path, keep):
     if keep:
-- 
2.20.1




More information about the elbe-devel mailing list