Mercurial > repos > cmonjeau > decompress_an_archive_and_merge
comparison extract_archive_and_merge.py @ 0:78d968479d52 draft default tip
Imported from capsule None
author | cmonjeau |
---|---|
date | Tue, 07 Jul 2015 08:49:40 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:78d968479d52 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 | |
4 | |
5 Created by Cyril MONJEAUD | |
6 Cyril.Monjeaud@irisa.fr | |
7 Last modification: 11/19/2014 | |
8 | |
9 And with the help of Anthony Bretaudeau for some stuff with bz2. | |
10 | |
11 """ | |
12 | |
13 import argparse, os, sys, subprocess, tempfile, shutil, gzip, zipfile, tarfile, gzip, bz2, shutil | |
14 import glob | |
15 from galaxy import eggs | |
16 from galaxy import util | |
17 from galaxy.datatypes.checkers import * | |
18 | |
19 def stop_err( msg ): | |
20 sys.stderr.write( '%s\n' % msg ) | |
21 sys.exit() | |
22 | |
23 def main(archive, archivename, logfile, logid, workdir, merge, rm_header=0, concat=''): | |
24 | |
25 # create a temporary repository | |
26 #tmp_dir = tempfile.mkdtemp(dir=os.getcwd()) | |
27 os.mkdir("decompress_files") | |
28 | |
29 #open log file | |
30 mylog = open(logfile, "w"); | |
31 | |
32 is_gzipped, is_gzvalid = check_gzip( archive ) | |
33 is_bzipped, is_bzvalid = check_bz2( archive ) | |
34 | |
35 # extract all files in a temp directory | |
36 # test if is a zip file | |
37 if check_zip( archive ): | |
38 with zipfile.ZipFile(archive, 'r') as myarchive: | |
39 myarchive.extractall("decompress_files") | |
40 | |
41 # test if is a tar file | |
42 elif tarfile.is_tarfile( archive ): | |
43 mytarfile=tarfile.TarFile.open(archive) | |
44 mytarfile.extractall("decompress_files") | |
45 mytarfile.close() | |
46 | |
47 # test if is a gzip file | |
48 elif is_gzipped and is_gzvalid : | |
49 mygzfile = gzip.open(archive, 'rb') | |
50 | |
51 myungzippedfile = open ("decompress_files/"+os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20) | |
52 for i in iter(lambda: mygzfile.read(2**20), ''): | |
53 myungzippedfile.write(i) | |
54 | |
55 myungzippedfile.close() | |
56 mygzfile.close() | |
57 | |
58 elif is_bzipped and is_bzvalid: | |
59 mybzfile = bz2.BZ2File(archive, 'rb') | |
60 | |
61 myunbzippedfile = open ("decompress_files/"+os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20) | |
62 for i in iter(lambda: mybzfile.read(2**20), ''): | |
63 myunbzippedfile.write(i) | |
64 | |
65 myunbzippedfile.close() | |
66 mybzfile.close() | |
67 | |
68 | |
69 # test if merge is enable | |
70 if merge == "true": | |
71 mylog.write("Merge option is enabled with "+str(rm_header)+" lines to deleted\n\n") | |
72 myfinalfile = open(concat, "w"); | |
73 for myfile in listdirectory("decompress_files"): | |
74 myopenfile = open(myfile, "r") | |
75 nblinesremove=0 | |
76 mylog.write(os.path.basename(myfile)+" is extracted from the archive and is added into the result file\n") | |
77 for line in myopenfile: | |
78 | |
79 #if not equal, don't write | |
80 if int(rm_header) != nblinesremove: | |
81 nblinesremove=nblinesremove+1 | |
82 else: | |
83 # write the line into the final file | |
84 myfinalfile.write(line) | |
85 | |
86 myfinalfile.close() | |
87 | |
88 shutil.rmtree("decompress_files") | |
89 | |
90 else: | |
91 # if merge is disable | |
92 mylog.write("Merge option is disabled\n\n") | |
93 | |
94 # move all files (recursively) in the working dir | |
95 for myfile in listdirectory("decompress_files"): | |
96 myfileclean = myfile.replace(" ", "\ ") | |
97 | |
98 mylog.write(os.path.basename(myfileclean)+" is extracted from the archive \n") | |
99 | |
100 fileext = os.path.splitext(myfile)[1].replace(".", "") | |
101 | |
102 # if no extension | |
103 if fileext == '': | |
104 shutil.move(os.path.abspath(myfile), os.path.abspath(myfile)+".txt") | |
105 | |
106 if fileext == 'fa': | |
107 shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fa", ".fasta")) | |
108 | |
109 if fileext == 'fq': | |
110 shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fq", ".fastq")) | |
111 | |
112 mylog.write("\nPlease refresh your history if all files are not present\n") | |
113 mylog.close() | |
114 | |
115 | |
116 | |
117 # parse the directory and return files path (in a tab) | |
118 def listdirectory(path): | |
119 myfile=[] | |
120 l = glob.glob(path+'/*') | |
121 for i in l: | |
122 # if directory | |
123 if os.path.isdir(i): | |
124 myfile.extend(listdirectory(i)) | |
125 # else put the file in the tab | |
126 else: | |
127 myfile.append(i) | |
128 return myfile | |
129 | |
130 | |
131 if __name__=="__main__": main(*sys.argv[1:]) |