comparison extract_archive_and_merge.py @ 0:78d968479d52 draft default tip

Imported from capsule None
author cmonjeau
date Tue, 07 Jul 2015 08:49:40 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:78d968479d52
1 #!/usr/bin/env python
2 """
3
4
5 Created by Cyril MONJEAUD
6 Cyril.Monjeaud@irisa.fr
7 Last modification: 11/19/2014
8
9 And with the help of Anthony Bretaudeau for some stuff with bz2.
10
11 """
12
13 import argparse, os, sys, subprocess, tempfile, shutil, gzip, zipfile, tarfile, gzip, bz2, shutil
14 import glob
15 from galaxy import eggs
16 from galaxy import util
17 from galaxy.datatypes.checkers import *
18
19 def stop_err( msg ):
20 sys.stderr.write( '%s\n' % msg )
21 sys.exit()
22
23 def main(archive, archivename, logfile, logid, workdir, merge, rm_header=0, concat=''):
24
25 # create a temporary repository
26 #tmp_dir = tempfile.mkdtemp(dir=os.getcwd())
27 os.mkdir("decompress_files")
28
29 #open log file
30 mylog = open(logfile, "w");
31
32 is_gzipped, is_gzvalid = check_gzip( archive )
33 is_bzipped, is_bzvalid = check_bz2( archive )
34
35 # extract all files in a temp directory
36 # test if is a zip file
37 if check_zip( archive ):
38 with zipfile.ZipFile(archive, 'r') as myarchive:
39 myarchive.extractall("decompress_files")
40
41 # test if is a tar file
42 elif tarfile.is_tarfile( archive ):
43 mytarfile=tarfile.TarFile.open(archive)
44 mytarfile.extractall("decompress_files")
45 mytarfile.close()
46
47 # test if is a gzip file
48 elif is_gzipped and is_gzvalid :
49 mygzfile = gzip.open(archive, 'rb')
50
51 myungzippedfile = open ("decompress_files/"+os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20)
52 for i in iter(lambda: mygzfile.read(2**20), ''):
53 myungzippedfile.write(i)
54
55 myungzippedfile.close()
56 mygzfile.close()
57
58 elif is_bzipped and is_bzvalid:
59 mybzfile = bz2.BZ2File(archive, 'rb')
60
61 myunbzippedfile = open ("decompress_files/"+os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20)
62 for i in iter(lambda: mybzfile.read(2**20), ''):
63 myunbzippedfile.write(i)
64
65 myunbzippedfile.close()
66 mybzfile.close()
67
68
69 # test if merge is enable
70 if merge == "true":
71 mylog.write("Merge option is enabled with "+str(rm_header)+" lines to deleted\n\n")
72 myfinalfile = open(concat, "w");
73 for myfile in listdirectory("decompress_files"):
74 myopenfile = open(myfile, "r")
75 nblinesremove=0
76 mylog.write(os.path.basename(myfile)+" is extracted from the archive and is added into the result file\n")
77 for line in myopenfile:
78
79 #if not equal, don't write
80 if int(rm_header) != nblinesremove:
81 nblinesremove=nblinesremove+1
82 else:
83 # write the line into the final file
84 myfinalfile.write(line)
85
86 myfinalfile.close()
87
88 shutil.rmtree("decompress_files")
89
90 else:
91 # if merge is disable
92 mylog.write("Merge option is disabled\n\n")
93
94 # move all files (recursively) in the working dir
95 for myfile in listdirectory("decompress_files"):
96 myfileclean = myfile.replace(" ", "\ ")
97
98 mylog.write(os.path.basename(myfileclean)+" is extracted from the archive \n")
99
100 fileext = os.path.splitext(myfile)[1].replace(".", "")
101
102 # if no extension
103 if fileext == '':
104 shutil.move(os.path.abspath(myfile), os.path.abspath(myfile)+".txt")
105
106 if fileext == 'fa':
107 shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fa", ".fasta"))
108
109 if fileext == 'fq':
110 shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fq", ".fastq"))
111
112 mylog.write("\nPlease refresh your history if all files are not present\n")
113 mylog.close()
114
115
116
117 # parse the directory and return files path (in a tab)
118 def listdirectory(path):
119 myfile=[]
120 l = glob.glob(path+'/*')
121 for i in l:
122 # if directory
123 if os.path.isdir(i):
124 myfile.extend(listdirectory(i))
125 # else put the file in the tab
126 else:
127 myfile.append(i)
128 return myfile
129
130
131 if __name__=="__main__": main(*sys.argv[1:])