changeset 0:a6beb4d4c417

Imported from capsule None
author cmonjeau
date Fri, 05 Jun 2015 11:41:26 -0400
parents
children 0a09dd575d91
files commet.py commet.xml commet_datatype.py datatypes_conf.xml prepare_commet.py prepare_commet.xml tool_dependencies.xml
diffstat 7 files changed, 379 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commet.py	Fri Jun 05 11:41:26 2015 -0400
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+import sys, tempfile, subprocess, glob
+import os, re, shutil, optparse
+import zipfile, tarfile, gzip
+from os.path import basename
+
+"""
+WARNING :
+
+commet.py needs commet_exe binaries in your $PATH
+
+commet is available after compiling sources :
+
+http://github.com/pierrepeterlongo/commet
+
+or with the galaxy_commet package in the GenOuest toolshed (coming soon)
+
+NOTE : 
+
+please add the line #!/usr/bin/env python in top of the Commet.py file if you've a bash error.
+
+
+"""
+
+def __main__():
+
+	# arguments recuperation
+        parser = optparse.OptionParser()
+        parser.add_option("--input", dest="input")
+        parser.add_option("-k", dest="kmer")
+        parser.add_option("-t", dest="minsharedkmer")
+        parser.add_option("-l", dest="minlengthread")
+        parser.add_option("-n", dest="maxn")
+        parser.add_option("-e", dest="minshannonindex")
+        parser.add_option("-m", dest="maxreads")
+
+        parser.add_option("--output")
+        parser.add_option("--output_vectors")
+        parser.add_option("--output_dendro")
+        parser.add_option("--output_logs")
+        parser.add_option("--output_matrix")
+        parser.add_option("--output_heatmap1")
+        parser.add_option("--output_heatmap2")
+        parser.add_option("--output_heatmap3")
+
+        (options, args) = parser.parse_args()
+
+
+	# copy R script into the current dir
+	shutil.copy(os.environ['RSCRIPTS']+"/heatmap.r", os.getcwd())
+        shutil.copy(os.environ['RSCRIPTS']+"/dendro.R", os.getcwd())
+
+	# remove the first line of the input file
+	commet_file = open(options.input, "r")
+	commet_file_clean = open("commet_clean_file", "w")
+
+	# delete the first line
+	commet_file.readline()
+	for line in commet_file:
+		commet_file_clean.write(line)
+
+	# close files
+	commet_file.close()
+	commet_file_clean.close()
+
+	# edit the command line
+	cmd_line=[]
+	cmd_line.append("Commet.py")
+	cmd_line.extend(["commet_clean_file","-b",os.environ['BINARIES'],"-k",options.kmer,"-t",options.minsharedkmer,"-l",options.minlengthread,"-e",options.minshannonindex])
+
+	# add options
+	if options.maxn:
+		
+		#cmd_line += ' -n '+options.maxn+' -m '+options.maxreads+' > '+options.output+' 2>>'+options.output
+		cmd_line.extend(["-n",options.maxn,"-m",options.maxreads])
+	#else:
+		#cmd_line += ' > '+options.output+' 2>>'+options.output
+
+	# execute job
+	p=subprocess.Popen(cmd_line,
+                   stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+
+	stdoutput, stderror = p.communicate()
+
+	# log file
+        logfile=open(options.output, "w")
+	logfile.write("[COMMAND LINE]"+' '.join(cmd_line)+"\n\n")
+	logfile.write(str(stdoutput))
+	logfile.write(str(stderror))
+	logfile.close()
+
+	# copy .bv files inside a .bv archive
+	tmp_output_dir=os.getcwd()+"/output_commet/"
+        os.chdir(tmp_output_dir)
+
+	# create zip outputs
+        mybvzipfile=zipfile.ZipFile(tmp_output_dir+'bv.zip.temp', 'w')
+        mylogzipfile=zipfile.ZipFile(tmp_output_dir+'log.zip.temp', 'w')
+        mymatrixzipfile=zipfile.ZipFile(tmp_output_dir+'matrix.zip.temp', 'w')
+
+	# write files into the specific archive
+        list_files = glob.glob(tmp_output_dir+'/*')
+        for i in list_files:
+
+		if re.search("\.bv$", i):
+			mybvzipfile.write(os.path.basename(i))
+		if re.search("\.log$", i):
+                        mylogzipfile.write(os.path.basename(i))
+		if re.search(".csv$", i):
+                        mymatrixzipfile.write(os.path.basename(i))
+
+	# close zip files
+	mybvzipfile.close()
+	mylogzipfile.close()
+        mymatrixzipfile.close()
+
+	# return the archives
+	shutil.move(tmp_output_dir+'bv.zip.temp', options.output_vectors)
+	shutil.move(tmp_output_dir+'log.zip.temp', options.output_logs)
+        shutil.move(tmp_output_dir+'matrix.zip.temp', options.output_matrix)
+
+	# outputs
+        shutil.move(tmp_output_dir+'dendrogram_normalized.png', options.output_dendro)
+	shutil.move(tmp_output_dir+'heatmap_normalized.png', options.output_heatmap1)
+	shutil.move(tmp_output_dir+'heatmap_percentage.png', options.output_heatmap2)
+	shutil.move(tmp_output_dir+'heatmap_plain.png', options.output_heatmap3)
+
+if __name__ == "__main__": __main__()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commet.xml	Fri Jun 05 11:41:26 2015 -0400
@@ -0,0 +1,103 @@
+<tool id="commet" name="commet" version="24.7.14">
+  <description>COmpare Multiple METagenomes</description>
+  <requirements>
+    <requirement type="package" version="24.7.14">commet</requirement>
+  </requirements>
+<command interpreter="python">
+commet.py
+--input $input
+-k $kmer
+-t $minsharedkmer
+-l $minlengthread
+-e $minshannonindex
+#if str( $options_advanced.options_advanced_selector ) == "advanced"
+-m $options_advanced.maxreads
+-n $options_advanced.maxn
+#end if
+--output $output
+--output_vectors $output_vectors
+--output_dendro $output_dendro
+--output_logs $output_logs
+--output_matrix $output_matrix
+--output_heatmap1 $output_heatmap1
+--output_heatmap2 $output_heatmap2
+--output_heatmap3 $output_heatmap3
+</command>
+
+  <inputs>
+	<!-- Input data files -->
+	<param name="input" type="data" format="commet" label="Read sets" help="input read sets a line = a set composed by “set_name: read_file; read_file...“. Generate with Prepare commet tool" /> 
+	<param name="kmer" type="integer" label="Size of kmers" value="33" help="Set the length of used kmers." />
+	<param name="minsharedkmer" type="integer" label="Mini shared kmers" value="2" help="Minimal number of shared kmers." />
+	<param name="minlengthread" type="integer" label="Read mini length" value="0" help="Minimal length a read should have to be kept." />
+	<param name="minshannonindex" type="float" label="Mini Shannon index" value="0" help="Minimal Shannon index a read should have to be kept. Float in [0,2.32]" />
+	<conditional name="options_advanced">
+		<param name="options_advanced_selector" type="select" label="Reads filter options" help="by default, all reads are kept with no Ns limitation">
+			<option value="default" selected="true">default</option>
+			<option value="advanced">advanced</option>
+		</param>
+		<when value="advanced">
+			<param name="maxreads" type="integer" value="600" label="Maximum number of selected reads in sets" help="Maximum number of selected reads in sets. If a set is composed by 3 read files, and this option = 600, then the first 200 reads from each read file will be treated" />
+			<param name="maxn" type="integer" value="5" label="Read maxi number of Ns" help="Maximal number of Ns a read should contain to be kept." />
+		</when>
+	</conditional>
+  </inputs>
+
+  <outputs>
+      <data format="txt" name="output" label="${tool.name} on ${on_string}: commet.log" />
+      <data format="zip" name="output_vectors" label="${tool.name} on ${on_string}: vector.zip" />
+      <data format="zip" name="output_logs" label="${tool.name} on ${on_string}: logs.zip" />
+      <data format="png" name="output_dendro" label="${tool.name} on ${on_string}: dendrogram.png" />
+      <data format="zip" name="output_matrix" label="${tool.name} on ${on_string}: matrix.zip" />
+      <data format="png" name="output_heatmap1" label="${tool.name} on ${on_string}: heatmap_normalized.png" />
+      <data format="png" name="output_heatmap2" label="${tool.name} on ${on_string}: heatmap_percentage.png" />
+      <data format="png" name="output_heatmap3" label="${tool.name} on ${on_string}: heatmap_plain.png" />
+  </outputs>
+  <help>
+
+**Description**
+
+COMMET (COmpare Multiple METagenomes”) provides a global similarity overview between all datasets of a large metagenomic project.
+
+Directly from non-assembled reads, all against all comparisons are performed through an efficient indexing strategy. Then, results are stored as bit vectors, a compressed representation of read files, that can be used to further combine read subsets by common logical operations. Finally, COMMET computes a clusterization of metagenomic datasets, which is visualized by dendrogram and heatmaps.
+
+
+-------
+
+**Web site**
+
+http://colibread.inria.fr/commet/
+
+-------
+
+**Integrated by**
+
+Yvan Le Bras and Cyril Monjeaud 
+
+GenOuest Bio-informatics Core Facility
+
+UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France)
+
+support@genouest.org
+
+If you use this tool in Galaxy, please cite :
+
+`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. &lt;https://www.e-biogenouest.org/resources/128&gt;`_
+
+
+  </help>
+<citations>
+<citation type="doi">10.1186/1471-2105-13-S19-S10</citation>
+<citation type="bibtex">@INPROCEEDINGS{JOBIM2013,
+    author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},
+    title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},
+    booktitle = {JOBIM 2013 Proceedings},
+    year = {2013},
+    url = {https://www.e-biogenouest.org/resources/128},
+    pages = {97-106}
+    }
+</citation>
+</citations>
+
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commet_datatype.py	Fri Jun 05 11:41:26 2015 -0400
@@ -0,0 +1,42 @@
+
+"""
+Commet input file datatype
+"""
+
+
+import logging, os, os.path, sys, time, tempfile, shutil, string, glob, re
+
+from galaxy.datatypes.sniff import *
+from galaxy.datatypes import data
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes.xml import GenericXml
+
+log = logging.getLogger(__name__)
+
+
+class Commet( data.Text ):
+    """
+    Resource Description Framework format (http://www.w3.org/RDF/).
+    """
+    file_ext = "commet"
+
+    def sniff( self, filename ):
+        """
+	Returns false and the user must manually set.
+        """
+	with open( filename ) as handle:
+	    first_line = handle.readline()
+	    if first_line.startswith('//commet input file//'):
+		return True
+
+        return False
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        """Set the peek and blurb text"""
+        if not dataset.dataset.purged:
+            dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+            dataset.blurb = 'Commet input data'
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml	Fri Jun 05 11:41:26 2015 -0400
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<datatypes>
+    <datatype_files>
+        <datatype_file name="commet_datatype.py"/>
+    </datatype_files>
+    <registration>
+    <datatype extension="commet" type="galaxy.datatypes.commet_datatype:Commet" display_in_upload="true"/>
+    </registration>
+  <sniffers>
+    <sniffer type="galaxy.datatypes.commet_datatype:Commet"/>
+  </sniffers>
+</datatypes>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prepare_commet.py	Fri Jun 05 11:41:26 2015 -0400
@@ -0,0 +1,26 @@
+import sys, os, re
+
+"""
+
+Created by Cyril Monjeaud
+Cyril.Monjeaud@irisa.fr
+
+"""
+
+def __main__():
+
+	# open the outpt file
+	read_set=open(sys.argv[1], 'w')
+        read_set.write("//commet input file//\n")
+
+	# write the files path
+	i = 2
+	while i < len(sys.argv):
+		read_set.write(sys.argv[i+1]+":")
+                read_set.write(sys.argv[i].replace(",", ";")+"\n") 
+		i = i+2 	
+
+	# close output file
+	read_set.close()
+
+if __name__ == "__main__": __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prepare_commet.xml	Fri Jun 05 11:41:26 2015 -0400
@@ -0,0 +1,61 @@
+<tool id="preparecommet" name="Prepare commet" version="1.0">
+  <description>input sets file</description>
+<command interpreter="python">
+prepare_commet.py
+${read_sets}
+#for $set in $sets
+    ${set.reads}
+    "${set.setname}"
+#end for
+
+</command>
+
+  <inputs>
+	<!-- Input data files -->
+	<repeat name="sets" title="Read sets" min="1">
+	    <param name="reads" type="data" multiple="true" format="fasta, fasta.gz, fastq, fastq.gz" label="Dataset"/>
+	    <param name="setname" type="text" label="Set name" value="set_name"/>
+	</repeat>
+  </inputs>
+
+  <outputs>
+      <data format="commet" name="read_sets" label="read_sets : ${tool.name} on ${on_string}" />
+  </outputs>
+  <help>
+
+**Description**
+
+Prepare a configuration file with sets for commet tool 
+
+-------
+
+**Created and integrated by**
+
+Yvan Le Bras and Cyril Monjeaud 
+
+GenOuest Bio-informatics Core Facility
+
+UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France)
+
+support@genouest.org
+
+If you use this tool in Galaxy, please cite :
+
+`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. &lt;https://www.e-biogenouest.org/resources/128&gt;`_
+
+  </help>
+
+<citations>
+<citation type="bibtex">@INPROCEEDINGS{JOBIM2013,
+    author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},
+    title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},
+    booktitle = {JOBIM 2013 Proceedings},
+    year = {2013},
+    url = {https://www.e-biogenouest.org/resources/128},
+    pages = {97-106}
+    }
+</citation>
+</citations>
+
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Fri Jun 05 11:41:26 2015 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="commet" version="24.7.14">
+      <repository changeset_revision="73a820a5e919" name="package_commet" owner="cmonjeau" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>