Mercurial > repos > cmonjeau > decompress_an_archive_and_merge
comparison extract_archive_and_merge.py @ 0:78d968479d52 draft default tip
Imported from capsule None
| author | cmonjeau |
|---|---|
| date | Tue, 07 Jul 2015 08:49:40 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:78d968479d52 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 | |
| 4 | |
| 5 Created by Cyril MONJEAUD | |
| 6 Cyril.Monjeaud@irisa.fr | |
| 7 Last modification: 11/19/2014 | |
| 8 | |
| 9 And with the help of Anthony Bretaudeau for some stuff with bz2. | |
| 10 | |
| 11 """ | |
| 12 | |
| 13 import argparse, os, sys, subprocess, tempfile, shutil, gzip, zipfile, tarfile, gzip, bz2, shutil | |
| 14 import glob | |
| 15 from galaxy import eggs | |
| 16 from galaxy import util | |
| 17 from galaxy.datatypes.checkers import * | |
| 18 | |
| 19 def stop_err( msg ): | |
| 20 sys.stderr.write( '%s\n' % msg ) | |
| 21 sys.exit() | |
| 22 | |
| 23 def main(archive, archivename, logfile, logid, workdir, merge, rm_header=0, concat=''): | |
| 24 | |
| 25 # create a temporary repository | |
| 26 #tmp_dir = tempfile.mkdtemp(dir=os.getcwd()) | |
| 27 os.mkdir("decompress_files") | |
| 28 | |
| 29 #open log file | |
| 30 mylog = open(logfile, "w"); | |
| 31 | |
| 32 is_gzipped, is_gzvalid = check_gzip( archive ) | |
| 33 is_bzipped, is_bzvalid = check_bz2( archive ) | |
| 34 | |
| 35 # extract all files in a temp directory | |
| 36 # test if is a zip file | |
| 37 if check_zip( archive ): | |
| 38 with zipfile.ZipFile(archive, 'r') as myarchive: | |
| 39 myarchive.extractall("decompress_files") | |
| 40 | |
| 41 # test if is a tar file | |
| 42 elif tarfile.is_tarfile( archive ): | |
| 43 mytarfile=tarfile.TarFile.open(archive) | |
| 44 mytarfile.extractall("decompress_files") | |
| 45 mytarfile.close() | |
| 46 | |
| 47 # test if is a gzip file | |
| 48 elif is_gzipped and is_gzvalid : | |
| 49 mygzfile = gzip.open(archive, 'rb') | |
| 50 | |
| 51 myungzippedfile = open ("decompress_files/"+os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20) | |
| 52 for i in iter(lambda: mygzfile.read(2**20), ''): | |
| 53 myungzippedfile.write(i) | |
| 54 | |
| 55 myungzippedfile.close() | |
| 56 mygzfile.close() | |
| 57 | |
| 58 elif is_bzipped and is_bzvalid: | |
| 59 mybzfile = bz2.BZ2File(archive, 'rb') | |
| 60 | |
| 61 myunbzippedfile = open ("decompress_files/"+os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20) | |
| 62 for i in iter(lambda: mybzfile.read(2**20), ''): | |
| 63 myunbzippedfile.write(i) | |
| 64 | |
| 65 myunbzippedfile.close() | |
| 66 mybzfile.close() | |
| 67 | |
| 68 | |
| 69 # test if merge is enable | |
| 70 if merge == "true": | |
| 71 mylog.write("Merge option is enabled with "+str(rm_header)+" lines to deleted\n\n") | |
| 72 myfinalfile = open(concat, "w"); | |
| 73 for myfile in listdirectory("decompress_files"): | |
| 74 myopenfile = open(myfile, "r") | |
| 75 nblinesremove=0 | |
| 76 mylog.write(os.path.basename(myfile)+" is extracted from the archive and is added into the result file\n") | |
| 77 for line in myopenfile: | |
| 78 | |
| 79 #if not equal, don't write | |
| 80 if int(rm_header) != nblinesremove: | |
| 81 nblinesremove=nblinesremove+1 | |
| 82 else: | |
| 83 # write the line into the final file | |
| 84 myfinalfile.write(line) | |
| 85 | |
| 86 myfinalfile.close() | |
| 87 | |
| 88 shutil.rmtree("decompress_files") | |
| 89 | |
| 90 else: | |
| 91 # if merge is disable | |
| 92 mylog.write("Merge option is disabled\n\n") | |
| 93 | |
| 94 # move all files (recursively) in the working dir | |
| 95 for myfile in listdirectory("decompress_files"): | |
| 96 myfileclean = myfile.replace(" ", "\ ") | |
| 97 | |
| 98 mylog.write(os.path.basename(myfileclean)+" is extracted from the archive \n") | |
| 99 | |
| 100 fileext = os.path.splitext(myfile)[1].replace(".", "") | |
| 101 | |
| 102 # if no extension | |
| 103 if fileext == '': | |
| 104 shutil.move(os.path.abspath(myfile), os.path.abspath(myfile)+".txt") | |
| 105 | |
| 106 if fileext == 'fa': | |
| 107 shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fa", ".fasta")) | |
| 108 | |
| 109 if fileext == 'fq': | |
| 110 shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fq", ".fastq")) | |
| 111 | |
| 112 mylog.write("\nPlease refresh your history if all files are not present\n") | |
| 113 mylog.close() | |
| 114 | |
| 115 | |
| 116 | |
| 117 # parse the directory and return files path (in a tab) | |
| 118 def listdirectory(path): | |
| 119 myfile=[] | |
| 120 l = glob.glob(path+'/*') | |
| 121 for i in l: | |
| 122 # if directory | |
| 123 if os.path.isdir(i): | |
| 124 myfile.extend(listdirectory(i)) | |
| 125 # else put the file in the tab | |
| 126 else: | |
| 127 myfile.append(i) | |
| 128 return myfile | |
| 129 | |
| 130 | |
| 131 if __name__=="__main__": main(*sys.argv[1:]) |
