0
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3
|
|
4
|
|
5 Created by Cyril MONJEAUD
|
|
6 Cyril.Monjeaud@irisa.fr
|
|
7 Last modification: 11/19/2014
|
|
8
|
|
9 And with the help of Anthony Bretaudeau for some stuff with bz2.
|
|
10
|
|
11 """
|
|
12
|
|
13 import argparse, os, sys, subprocess, tempfile, shutil, gzip, zipfile, tarfile, gzip, bz2, shutil
|
|
14 import glob
|
|
15 from galaxy import eggs
|
|
16 from galaxy import util
|
|
17 from galaxy.datatypes.checkers import *
|
|
18
|
|
19 def stop_err( msg ):
|
|
20 sys.stderr.write( '%s\n' % msg )
|
|
21 sys.exit()
|
|
22
|
|
23 def main(archive, archivename, logfile, logid, workdir, merge, rm_header=0, concat=''):
|
|
24
|
|
25 # create a temporary repository
|
|
26 #tmp_dir = tempfile.mkdtemp(dir=os.getcwd())
|
|
27 os.mkdir("decompress_files")
|
|
28
|
|
29 #open log file
|
|
30 mylog = open(logfile, "w");
|
|
31
|
|
32 is_gzipped, is_gzvalid = check_gzip( archive )
|
|
33 is_bzipped, is_bzvalid = check_bz2( archive )
|
|
34
|
|
35 # extract all files in a temp directory
|
|
36 # test if is a zip file
|
|
37 if check_zip( archive ):
|
|
38 with zipfile.ZipFile(archive, 'r') as myarchive:
|
|
39 myarchive.extractall("decompress_files")
|
|
40
|
|
41 # test if is a tar file
|
|
42 elif tarfile.is_tarfile( archive ):
|
|
43 mytarfile=tarfile.TarFile.open(archive)
|
|
44 mytarfile.extractall("decompress_files")
|
|
45 mytarfile.close()
|
|
46
|
|
47 # test if is a gzip file
|
|
48 elif is_gzipped and is_gzvalid :
|
|
49 mygzfile = gzip.open(archive, 'rb')
|
|
50
|
|
51 myungzippedfile = open ("decompress_files/"+os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20)
|
|
52 for i in iter(lambda: mygzfile.read(2**20), ''):
|
|
53 myungzippedfile.write(i)
|
|
54
|
|
55 myungzippedfile.close()
|
|
56 mygzfile.close()
|
|
57
|
|
58 elif is_bzipped and is_bzvalid:
|
|
59 mybzfile = bz2.BZ2File(archive, 'rb')
|
|
60
|
|
61 myunbzippedfile = open ("decompress_files/"+os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20)
|
|
62 for i in iter(lambda: mybzfile.read(2**20), ''):
|
|
63 myunbzippedfile.write(i)
|
|
64
|
|
65 myunbzippedfile.close()
|
|
66 mybzfile.close()
|
|
67
|
|
68
|
|
69 # test if merge is enable
|
|
70 if merge == "true":
|
|
71 mylog.write("Merge option is enabled with "+str(rm_header)+" lines to deleted\n\n")
|
|
72 myfinalfile = open(concat, "w");
|
|
73 for myfile in listdirectory("decompress_files"):
|
|
74 myopenfile = open(myfile, "r")
|
|
75 nblinesremove=0
|
|
76 mylog.write(os.path.basename(myfile)+" is extracted from the archive and is added into the result file\n")
|
|
77 for line in myopenfile:
|
|
78
|
|
79 #if not equal, don't write
|
|
80 if int(rm_header) != nblinesremove:
|
|
81 nblinesremove=nblinesremove+1
|
|
82 else:
|
|
83 # write the line into the final file
|
|
84 myfinalfile.write(line)
|
|
85
|
|
86 myfinalfile.close()
|
|
87
|
|
88 shutil.rmtree("decompress_files")
|
|
89
|
|
90 else:
|
|
91 # if merge is disable
|
|
92 mylog.write("Merge option is disabled\n\n")
|
|
93
|
|
94 # move all files (recursively) in the working dir
|
|
95 for myfile in listdirectory("decompress_files"):
|
|
96 myfileclean = myfile.replace(" ", "\ ")
|
|
97
|
|
98 mylog.write(os.path.basename(myfileclean)+" is extracted from the archive \n")
|
|
99
|
|
100 fileext = os.path.splitext(myfile)[1].replace(".", "")
|
|
101
|
|
102 # if no extension
|
|
103 if fileext == '':
|
|
104 shutil.move(os.path.abspath(myfile), os.path.abspath(myfile)+".txt")
|
|
105
|
|
106 if fileext == 'fa':
|
|
107 shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fa", ".fasta"))
|
|
108
|
|
109 if fileext == 'fq':
|
|
110 shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fq", ".fastq"))
|
|
111
|
|
112 mylog.write("\nPlease refresh your history if all files are not present\n")
|
|
113 mylog.close()
|
|
114
|
|
115
|
|
116
|
|
117 # parse the directory and return files path (in a tab)
|
|
118 def listdirectory(path):
|
|
119 myfile=[]
|
|
120 l = glob.glob(path+'/*')
|
|
121 for i in l:
|
|
122 # if directory
|
|
123 if os.path.isdir(i):
|
|
124 myfile.extend(listdirectory(i))
|
|
125 # else put the file in the tab
|
|
126 else:
|
|
127 myfile.append(i)
|
|
128 return myfile
|
|
129
|
|
130
|
|
131 if __name__=="__main__": main(*sys.argv[1:])
|