changeset 0:78d968479d52 draft default tip

Imported from capsule None
author cmonjeau
date Tue, 07 Jul 2015 08:49:40 -0400
parents
children
files extract_archive_and_merge.py extract_archive_and_merge.xml repository_dependencies.xml
diffstat 3 files changed, 195 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_archive_and_merge.py	Tue Jul 07 08:49:40 2015 -0400
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+"""
+
+
+Created by Cyril MONJEAUD
+Cyril.Monjeaud@irisa.fr
+Last modification: 11/19/2014
+
+And with the help of Anthony Bretaudeau for some stuff with bz2.
+
+"""
+
+import argparse, os, sys, subprocess, tempfile, shutil, gzip, zipfile, tarfile, gzip, bz2, shutil
+import glob 
+from galaxy import eggs
+from galaxy import util
+from galaxy.datatypes.checkers import *
+
+def stop_err( msg ):
+    sys.stderr.write( '%s\n' % msg )
+    sys.exit()
+
+def main(archive, archivename, logfile, logid, workdir, merge, rm_header=0, concat=''):
+
+    # create a temporary repository
+    #tmp_dir = tempfile.mkdtemp(dir=os.getcwd())
+    os.mkdir("decompress_files")    
+
+    #open log file
+    mylog = open(logfile, "w");
+
+    is_gzipped, is_gzvalid = check_gzip( archive )
+    is_bzipped, is_bzvalid = check_bz2( archive )
+
+    # extract all files in a temp directory
+    # test if is a zip file
+    if check_zip( archive ):
+	with zipfile.ZipFile(archive, 'r') as myarchive:
+		myarchive.extractall("decompress_files")
+
+    # test if is a tar file
+    elif tarfile.is_tarfile( archive ):
+	mytarfile=tarfile.TarFile.open(archive)
+	mytarfile.extractall("decompress_files")
+	mytarfile.close()
+ 
+    # test if is a gzip file
+    elif is_gzipped and is_gzvalid :
+	mygzfile = gzip.open(archive, 'rb')
+
+	myungzippedfile = open ("decompress_files/"+os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20)
+	for i in iter(lambda: mygzfile.read(2**20), ''):
+		myungzippedfile.write(i) 
+
+	myungzippedfile.close()
+	mygzfile.close()
+
+    elif is_bzipped and is_bzvalid:
+        mybzfile = bz2.BZ2File(archive, 'rb')
+
+        myunbzippedfile = open ("decompress_files/"+os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20)
+        for i in iter(lambda: mybzfile.read(2**20), ''):
+                myunbzippedfile.write(i)
+
+        myunbzippedfile.close()
+        mybzfile.close()
+
+		
+    # test if merge is enable
+    if merge == "true":
+	mylog.write("Merge option is enabled with "+str(rm_header)+" lines to deleted\n\n")
+	myfinalfile = open(concat, "w");
+	for myfile in listdirectory("decompress_files"):
+		myopenfile = open(myfile, "r")
+		nblinesremove=0
+		mylog.write(os.path.basename(myfile)+" is extracted from the archive and is added into the result file\n")
+		for line in myopenfile:	
+
+			#if not equal, don't write	
+			if int(rm_header) != nblinesremove:
+				nblinesremove=nblinesremove+1
+			else:
+				# write the line into the final file			
+				myfinalfile.write(line)
+	
+	myfinalfile.close()
+
+	shutil.rmtree("decompress_files")
+
+    else:
+	# if merge is disable
+        mylog.write("Merge option is disabled\n\n")
+
+   	# move all files (recursively) in the working dir
+    	for myfile in listdirectory("decompress_files"):
+		myfileclean = myfile.replace(" ", "\ ")
+
+		mylog.write(os.path.basename(myfileclean)+" is extracted from the archive \n")
+
+		fileext = os.path.splitext(myfile)[1].replace(".", "")
+
+		# if no extension
+		if fileext == '':
+                        shutil.move(os.path.abspath(myfile), os.path.abspath(myfile)+".txt")
+
+		if fileext == 'fa':
+                        shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fa", ".fasta"))
+
+		if fileext == 'fq':
+			shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fq", ".fastq"))
+
+   	mylog.write("\nPlease refresh your history if all files are not present\n")
+    	mylog.close()
+
+
+
+# parse the directory and return files path (in a tab)
+def listdirectory(path):
+	myfile=[]
+	l = glob.glob(path+'/*') 
+    	for i in l:
+		# if directory
+        	if os.path.isdir(i): 
+			myfile.extend(listdirectory(i))
+		# else put the file in the tab      	  	
+		else:
+			myfile.append(i)
+    	return myfile
+
+
+if __name__=="__main__": main(*sys.argv[1:])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_archive_and_merge.xml	Tue Jul 07 08:49:40 2015 -0400
@@ -0,0 +1,60 @@
+<tool id="extract_archive_merge" name="Decompress an archive" version="1.0.0" force_history_refresh="True" >
+  <description>in zip, gz, tar.gz, fastq.gz, fastq.bz2 or tar.bz2 format</description>
+  <command interpreter="python">
+	
+extract_archive_and_merge.py $input_archive "$input_archive.display_name" $log $log.id $__new_file_path__ $options_merge.merge
+#if str( $options_merge.merge ) == 'true'
+${options_merge.rm_header}
+$concat
+#end if
+
+  </command>
+ <inputs>
+    <param name="input_archive" type="data" format="zip,tar.gz,tar.bz2,fastq.gz,fastq.bz2" label="Archive name" help="Accepts zip, tar.gz and tar.bz2" />
+    <conditional name="options_merge">
+    <param name="merge" type="select" format="text" label="Merges all files into one">
+	<option value="false">No</option>
+	<option value="true">Yes</option>
+    </param>
+    <when value="false">
+    </when>
+    <when value="true">
+	<param name="rm_header" type="integer" value="0" label="Header lines to delete" help="number of lines to delete at the beginning of each file"/>
+    </when>
+    </conditional>
+ </inputs>
+ <outputs>
+    <data format="txt" name="log" label="decompress_an_archive.log" />
+    <data format="txt" name="additional" label="additional file with ${tool.name}" hidden="true">
+          <discover_datasets pattern="__designation_and_ext__" directory="decompress_files" visible="true" />
+    </data>  
+
+    <data format="txt" name="concat" label="merge_file">
+       <filter>(options_merge['merge'] == 'true')</filter>
+    </data>
+ </outputs>
+ <help>
+**Tool documentation**
+
+This tool simply decompresses an archive file (zip, gz, tar.gz, fastq.gz, fastq.bz2 or tar.bz2) and merges all files into only one. 
+If the merge option is enabled, you can delete as many header lines as you need.
+
+WARNING : the filename should not have special characters (space, brackets, ...), please rename it!!!
+
+--------
+
+**Created and integrated by:**
+
+Cyril Monjeaud 
+
+GenOuest Bio-informatics Core Facility
+
+UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France)
+
+support@genouest.org
+
+If you use this tool in Galaxy, please cite :
+
+`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. &lt;https://www.e-biogenouest.org/resources/128&gt;`_
+ </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Tue Jul 07 08:49:40 2015 -0400
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="This tool requires archive datatypes definitions (e.g. zip, tar.gz, etc. format).">
+  <repository changeset_revision="9e4c4b66b4a0" name="archive_datatypes" owner="cmonjeau" toolshed="https://toolshed.g2.bx.psu.edu" />
+</repositories>