Mercurial > repos > guerler > springsuite
annotate planemo/lib/python3.7/site-packages/galaxy/util/checkers.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler | 
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 1 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 1 import gzip | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 2 import io | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 3 import re | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 4 import sys | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 5 import tarfile | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 6 import zipfile | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 7 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 8 from six import ( | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 9 BytesIO, | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 10 StringIO | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 11 ) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 12 from six.moves import filter | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 13 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 14 from galaxy import util | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 15 from galaxy.util.image_util import image_type | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 16 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 17 if sys.version_info < (3, 3): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 18 gzip.GzipFile.read1 = gzip.GzipFile.read # workaround for https://bugs.python.org/issue12591 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 19 try: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 20 import bz2file as bz2 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 21 except ImportError: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 22 # If bz2file is unavailable, just fallback to not having pbzip2 support. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 23 import bz2 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 24 else: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 25 import bz2 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 26 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 27 HTML_CHECK_LINES = 100 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 28 CHUNK_SIZE = 2 ** 15 # 32Kb | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 29 HTML_REGEXPS = ( | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 30 re.compile(r"<A\s+[^>]*HREF[^>]+>", re.I), | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 31 re.compile(r"<IFRAME[^>]*>", re.I), | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 32 re.compile(r"<FRAMESET[^>]*>", re.I), | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 33 re.compile(r"<META[\W][^>]*>", re.I), | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 34 re.compile(r"<SCRIPT[^>]*>", re.I), | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 35 ) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 36 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 37 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 38 def check_html(name, file_path=True): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 39 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 40 Returns True if the file/string contains HTML code. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 41 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 42 # Handles files if file_path is True or text if file_path is False | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 43 if file_path: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 44 temp = io.open(name, "r", encoding='utf-8') | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 45 else: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 46 temp = StringIO(util.unicodify(name)) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 47 try: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 48 for _ in range(HTML_CHECK_LINES): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 49 line = temp.readline(CHUNK_SIZE) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 50 if not line: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 51 break | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 52 if any(regexp.search(line) for regexp in HTML_REGEXPS): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 53 return True | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 54 except UnicodeDecodeError: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 55 return False | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 56 finally: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 57 temp.close() | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 58 return False | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 59 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 60 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 61 def check_binary(name, file_path=True): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 62 # Handles files if file_path is True or text if file_path is False | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 63 if file_path: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 64 temp = open(name, "rb") | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 65 else: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 66 temp = BytesIO(name) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 67 try: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 68 return util.is_binary(temp.read(1024)) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 69 finally: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 70 temp.close() | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 71 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 72 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 73 def check_gzip(file_path, check_content=True): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 74 # This method returns a tuple of booleans representing ( is_gzipped, is_valid ) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 75 # Make sure we have a gzipped file | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 76 try: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 77 with open(file_path, "rb") as temp: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 78 magic_check = temp.read(2) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 79 if magic_check != util.gzip_magic: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 80 return (False, False) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 81 except Exception: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 82 return (False, False) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 83 # We support some binary data types, so check if the compressed binary file is valid | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 84 # If the file is Bam, it should already have been detected as such, so we'll just check | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 85 # for sff format. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 86 try: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 87 with gzip.open(file_path, 'rb') as fh: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 88 header = fh.read(4) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 89 if header == b'.sff': | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 90 return (True, True) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 91 except Exception: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 92 return(False, False) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 93 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 94 if not check_content: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 95 return (True, True) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 96 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 97 with gzip.open(file_path, mode='rb') as gzipped_file: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 98 chunk = gzipped_file.read(CHUNK_SIZE) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 99 # See if we have a compressed HTML file | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 100 if check_html(chunk, file_path=False): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 101 return (True, False) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 102 return (True, True) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 103 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 104 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 105 def check_bz2(file_path, check_content=True): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 106 try: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 107 with open(file_path, "rb") as temp: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 108 magic_check = temp.read(3) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 109 if magic_check != util.bz2_magic: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 110 return (False, False) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 111 except Exception: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 112 return(False, False) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 113 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 114 if not check_content: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 115 return (True, True) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 116 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 117 with bz2.BZ2File(file_path, mode='rb') as bzipped_file: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 118 chunk = bzipped_file.read(CHUNK_SIZE) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 119 # See if we have a compressed HTML file | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 120 if check_html(chunk, file_path=False): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 121 return (True, False) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 122 return (True, True) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 123 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 124 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 125 def check_zip(file_path, check_content=True, files=1): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 126 if not zipfile.is_zipfile(file_path): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 127 return (False, False) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 128 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 129 if not check_content: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 130 return (True, True) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 131 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 132 chunk = None | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 133 for filect, member in enumerate(iter_zip(file_path)): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 134 handle, name = member | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 135 chunk = handle.read(CHUNK_SIZE) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 136 if chunk and check_html(chunk, file_path=False): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 137 return (True, False) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 138 if filect >= files: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 139 break | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 140 return (True, True) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 141 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 142 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 143 def is_bz2(file_path): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 144 is_bz2, is_valid = check_bz2(file_path, check_content=False) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 145 return is_bz2 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 146 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 147 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 148 def is_gzip(file_path): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 149 is_gzipped, is_valid = check_gzip(file_path, check_content=False) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 150 return is_gzipped | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 151 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 152 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 153 def is_zip(file_path): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 154 is_zipped, is_valid = check_zip(file_path, check_content=False) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 155 return is_zipped | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 156 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 157 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 158 def is_single_file_zip(file_path): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 159 for i, member in enumerate(iter_zip(file_path)): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 160 if i > 1: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 161 return False | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 162 return True | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 163 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 164 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 165 def is_tar(file_path): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 166 return tarfile.is_tarfile(file_path) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 167 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 168 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 169 def iter_zip(file_path): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 170 with zipfile.ZipFile(file_path) as z: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 171 for f in filter(lambda x: not x.endswith('/'), z.namelist()): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 172 yield (z.open(f), f) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 173 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 174 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 175 def check_image(file_path): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 176 """ Simple wrapper around image_type to yield a True/False verdict """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 177 if image_type(file_path): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 178 return True | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 179 return False | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 180 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 181 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 182 __all__ = ( | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 183 'check_binary', | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 184 'check_bz2', | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 185 'check_gzip', | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 186 'check_html', | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 187 'check_image', | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 188 'check_zip', | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 189 'is_gzip', | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 190 'is_bz2', | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 191 'is_zip', | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 192 ) | 
