# HG changeset patch
# User cmonjeau
# Date 1436273380 14400
# Node ID 78d968479d52bc85f1c02c195893075b83bac58d
Imported from capsule None
diff -r 000000000000 -r 78d968479d52 extract_archive_and_merge.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_archive_and_merge.py Tue Jul 07 08:49:40 2015 -0400
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+"""
+
+
+Created by Cyril MONJEAUD
+Cyril.Monjeaud@irisa.fr
+Last modification: 11/19/2014
+
+And with the help of Anthony Bretaudeau for some stuff with bz2.
+
+"""
+
+import argparse, os, sys, subprocess, tempfile, shutil, gzip, zipfile, tarfile, gzip, bz2, shutil
+import glob
+from galaxy import eggs
+from galaxy import util
+from galaxy.datatypes.checkers import *
+
+def stop_err( msg ):
+ sys.stderr.write( '%s\n' % msg )
+ sys.exit()
+
+def main(archive, archivename, logfile, logid, workdir, merge, rm_header=0, concat=''):
+
+ # create a temporary repository
+ #tmp_dir = tempfile.mkdtemp(dir=os.getcwd())
+ os.mkdir("decompress_files")
+
+ #open log file
+ mylog = open(logfile, "w");
+
+ is_gzipped, is_gzvalid = check_gzip( archive )
+ is_bzipped, is_bzvalid = check_bz2( archive )
+
+ # extract all files in a temp directory
+ # test if is a zip file
+ if check_zip( archive ):
+ with zipfile.ZipFile(archive, 'r') as myarchive:
+ myarchive.extractall("decompress_files")
+
+ # test if is a tar file
+ elif tarfile.is_tarfile( archive ):
+ mytarfile=tarfile.TarFile.open(archive)
+ mytarfile.extractall("decompress_files")
+ mytarfile.close()
+
+ # test if is a gzip file
+ elif is_gzipped and is_gzvalid :
+ mygzfile = gzip.open(archive, 'rb')
+
+ myungzippedfile = open ("decompress_files/"+os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20)
+ for i in iter(lambda: mygzfile.read(2**20), ''):
+ myungzippedfile.write(i)
+
+ myungzippedfile.close()
+ mygzfile.close()
+
+ elif is_bzipped and is_bzvalid:
+ mybzfile = bz2.BZ2File(archive, 'rb')
+
+ myunbzippedfile = open ("decompress_files/"+os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20)
+ for i in iter(lambda: mybzfile.read(2**20), ''):
+ myunbzippedfile.write(i)
+
+ myunbzippedfile.close()
+ mybzfile.close()
+
+
+ # test if merge is enable
+ if merge == "true":
+ mylog.write("Merge option is enabled with "+str(rm_header)+" lines to deleted\n\n")
+ myfinalfile = open(concat, "w");
+ for myfile in listdirectory("decompress_files"):
+ myopenfile = open(myfile, "r")
+ nblinesremove=0
+ mylog.write(os.path.basename(myfile)+" is extracted from the archive and is added into the result file\n")
+ for line in myopenfile:
+
+ #if not equal, don't write
+ if int(rm_header) != nblinesremove:
+ nblinesremove=nblinesremove+1
+ else:
+ # write the line into the final file
+ myfinalfile.write(line)
+
+ myfinalfile.close()
+
+ shutil.rmtree("decompress_files")
+
+ else:
+ # if merge is disable
+ mylog.write("Merge option is disabled\n\n")
+
+ # move all files (recursively) in the working dir
+ for myfile in listdirectory("decompress_files"):
+ myfileclean = myfile.replace(" ", "\ ")
+
+ mylog.write(os.path.basename(myfileclean)+" is extracted from the archive \n")
+
+ fileext = os.path.splitext(myfile)[1].replace(".", "")
+
+ # if no extension
+ if fileext == '':
+ shutil.move(os.path.abspath(myfile), os.path.abspath(myfile)+".txt")
+
+ if fileext == 'fa':
+ shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fa", ".fasta"))
+
+ if fileext == 'fq':
+ shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fq", ".fastq"))
+
+ mylog.write("\nPlease refresh your history if all files are not present\n")
+ mylog.close()
+
+
+
+# parse the directory and return files path (in a tab)
+def listdirectory(path):
+ myfile=[]
+ l = glob.glob(path+'/*')
+ for i in l:
+ # if directory
+ if os.path.isdir(i):
+ myfile.extend(listdirectory(i))
+ # else put the file in the tab
+ else:
+ myfile.append(i)
+ return myfile
+
+
+if __name__=="__main__": main(*sys.argv[1:])
diff -r 000000000000 -r 78d968479d52 extract_archive_and_merge.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_archive_and_merge.xml Tue Jul 07 08:49:40 2015 -0400
@@ -0,0 +1,60 @@
+
diff -r 000000000000 -r 78d968479d52 repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Tue Jul 07 08:49:40 2015 -0400
@@ -0,0 +1,4 @@
+
+
+
+