# HG changeset patch # User cmonjeau # Date 1433518886 14400 # Node ID a6beb4d4c417b0d0d7af82071db2c8265f4201e6 Imported from capsule None diff -r 000000000000 -r a6beb4d4c417 commet.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commet.py Fri Jun 05 11:41:26 2015 -0400 @@ -0,0 +1,129 @@ +#!/usr/bin/env python +import sys, tempfile, subprocess, glob +import os, re, shutil, optparse +import zipfile, tarfile, gzip +from os.path import basename + +""" +WARNING : + +commet.py needs commet_exe binaries in your $PATH + +commet is available after compiling sources : + +http://github.com/pierrepeterlongo/commet + +or with the galaxy_commet package in the GenOuest toolshed (coming soon) + +NOTE : + +please add the line #!/usr/bin/env python in top of the Commet.py file if you've a bash error. + + +""" + +def __main__(): + + # arguments recuperation + parser = optparse.OptionParser() + parser.add_option("--input", dest="input") + parser.add_option("-k", dest="kmer") + parser.add_option("-t", dest="minsharedkmer") + parser.add_option("-l", dest="minlengthread") + parser.add_option("-n", dest="maxn") + parser.add_option("-e", dest="minshannonindex") + parser.add_option("-m", dest="maxreads") + + parser.add_option("--output") + parser.add_option("--output_vectors") + parser.add_option("--output_dendro") + parser.add_option("--output_logs") + parser.add_option("--output_matrix") + parser.add_option("--output_heatmap1") + parser.add_option("--output_heatmap2") + parser.add_option("--output_heatmap3") + + (options, args) = parser.parse_args() + + + # copy R script into the current dir + shutil.copy(os.environ['RSCRIPTS']+"/heatmap.r", os.getcwd()) + shutil.copy(os.environ['RSCRIPTS']+"/dendro.R", os.getcwd()) + + # remove the first line of the input file + commet_file = open(options.input, "r") + commet_file_clean = open("commet_clean_file", "w") + + # delete the first line + commet_file.readline() + for line in commet_file: + commet_file_clean.write(line) + + # close files + commet_file.close() + commet_file_clean.close() + + # edit the command line + cmd_line=[] + cmd_line.append("Commet.py") + cmd_line.extend(["commet_clean_file","-b",os.environ['BINARIES'],"-k",options.kmer,"-t",options.minsharedkmer,"-l",options.minlengthread,"-e",options.minshannonindex]) + + # add options + if options.maxn: + + #cmd_line += ' -n '+options.maxn+' -m '+options.maxreads+' > '+options.output+' 2>>'+options.output + cmd_line.extend(["-n",options.maxn,"-m",options.maxreads]) + #else: + #cmd_line += ' > '+options.output+' 2>>'+options.output + + # execute job + p=subprocess.Popen(cmd_line, + stdout=subprocess.PIPE,stderr=subprocess.PIPE) + + stdoutput, stderror = p.communicate() + + # log file + logfile=open(options.output, "w") + logfile.write("[COMMAND LINE]"+' '.join(cmd_line)+"\n\n") + logfile.write(str(stdoutput)) + logfile.write(str(stderror)) + logfile.close() + + # copy .bv files inside a .bv archive + tmp_output_dir=os.getcwd()+"/output_commet/" + os.chdir(tmp_output_dir) + + # create zip outputs + mybvzipfile=zipfile.ZipFile(tmp_output_dir+'bv.zip.temp', 'w') + mylogzipfile=zipfile.ZipFile(tmp_output_dir+'log.zip.temp', 'w') + mymatrixzipfile=zipfile.ZipFile(tmp_output_dir+'matrix.zip.temp', 'w') + + # write files into the specific archive + list_files = glob.glob(tmp_output_dir+'/*') + for i in list_files: + + if re.search("\.bv$", i): + mybvzipfile.write(os.path.basename(i)) + if re.search("\.log$", i): + mylogzipfile.write(os.path.basename(i)) + if re.search(".csv$", i): + mymatrixzipfile.write(os.path.basename(i)) + + # close zip files + mybvzipfile.close() + mylogzipfile.close() + mymatrixzipfile.close() + + # return the archives + shutil.move(tmp_output_dir+'bv.zip.temp', options.output_vectors) + shutil.move(tmp_output_dir+'log.zip.temp', options.output_logs) + shutil.move(tmp_output_dir+'matrix.zip.temp', options.output_matrix) + + # outputs + shutil.move(tmp_output_dir+'dendrogram_normalized.png', options.output_dendro) + shutil.move(tmp_output_dir+'heatmap_normalized.png', options.output_heatmap1) + shutil.move(tmp_output_dir+'heatmap_percentage.png', options.output_heatmap2) + shutil.move(tmp_output_dir+'heatmap_plain.png', options.output_heatmap3) + +if __name__ == "__main__": __main__() + diff -r 000000000000 -r a6beb4d4c417 commet.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commet.xml Fri Jun 05 11:41:26 2015 -0400 @@ -0,0 +1,103 @@ + + COmpare Multiple METagenomes + + commet + + +commet.py +--input $input +-k $kmer +-t $minsharedkmer +-l $minlengthread +-e $minshannonindex +#if str( $options_advanced.options_advanced_selector ) == "advanced" +-m $options_advanced.maxreads +-n $options_advanced.maxn +#end if +--output $output +--output_vectors $output_vectors +--output_dendro $output_dendro +--output_logs $output_logs +--output_matrix $output_matrix +--output_heatmap1 $output_heatmap1 +--output_heatmap2 $output_heatmap2 +--output_heatmap3 $output_heatmap3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**Description** + +COMMET (COmpare Multiple METagenomes”) provides a global similarity overview between all datasets of a large metagenomic project. + +Directly from non-assembled reads, all against all comparisons are performed through an efficient indexing strategy. Then, results are stored as bit vectors, a compressed representation of read files, that can be used to further combine read subsets by common logical operations. Finally, COMMET computes a clusterization of metagenomic datasets, which is visualized by dendrogram and heatmaps. + + +------- + +**Web site** + +http://colibread.inria.fr/commet/ + +------- + +**Integrated by** + +Yvan Le Bras and Cyril Monjeaud + +GenOuest Bio-informatics Core Facility + +UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France) + +support@genouest.org + +If you use this tool in Galaxy, please cite : + +`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. <https://www.e-biogenouest.org/resources/128>`_ + + + + +10.1186/1471-2105-13-S19-S10 +@INPROCEEDINGS{JOBIM2013, + author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.}, + title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France}, + booktitle = {JOBIM 2013 Proceedings}, + year = {2013}, + url = {https://www.e-biogenouest.org/resources/128}, + pages = {97-106} + } + + + + + diff -r 000000000000 -r a6beb4d4c417 commet_datatype.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commet_datatype.py Fri Jun 05 11:41:26 2015 -0400 @@ -0,0 +1,42 @@ + +""" +Commet input file datatype +""" + + +import logging, os, os.path, sys, time, tempfile, shutil, string, glob, re + +from galaxy.datatypes.sniff import * +from galaxy.datatypes import data +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.xml import GenericXml + +log = logging.getLogger(__name__) + + +class Commet( data.Text ): + """ + Resource Description Framework format (http://www.w3.org/RDF/). + """ + file_ext = "commet" + + def sniff( self, filename ): + """ + Returns false and the user must manually set. + """ + with open( filename ) as handle: + first_line = handle.readline() + if first_line.startswith('//commet input file//'): + return True + + return False + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'Commet input data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + diff -r 000000000000 -r a6beb4d4c417 datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Fri Jun 05 11:41:26 2015 -0400 @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff -r 000000000000 -r a6beb4d4c417 prepare_commet.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prepare_commet.py Fri Jun 05 11:41:26 2015 -0400 @@ -0,0 +1,26 @@ +import sys, os, re + +""" + +Created by Cyril Monjeaud +Cyril.Monjeaud@irisa.fr + +""" + +def __main__(): + + # open the outpt file + read_set=open(sys.argv[1], 'w') + read_set.write("//commet input file//\n") + + # write the files path + i = 2 + while i < len(sys.argv): + read_set.write(sys.argv[i+1]+":") + read_set.write(sys.argv[i].replace(",", ";")+"\n") + i = i+2 + + # close output file + read_set.close() + +if __name__ == "__main__": __main__() diff -r 000000000000 -r a6beb4d4c417 prepare_commet.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prepare_commet.xml Fri Jun 05 11:41:26 2015 -0400 @@ -0,0 +1,61 @@ + + input sets file + +prepare_commet.py +${read_sets} +#for $set in $sets + ${set.reads} + "${set.setname}" +#end for + + + + + + + + + + + + + + + + +**Description** + +Prepare a configuration file with sets for commet tool + +------- + +**Created and integrated by** + +Yvan Le Bras and Cyril Monjeaud + +GenOuest Bio-informatics Core Facility + +UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France) + +support@genouest.org + +If you use this tool in Galaxy, please cite : + +`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. <https://www.e-biogenouest.org/resources/128>`_ + + + + +@INPROCEEDINGS{JOBIM2013, + author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.}, + title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France}, + booktitle = {JOBIM 2013 Proceedings}, + year = {2013}, + url = {https://www.e-biogenouest.org/resources/128}, + pages = {97-106} + } + + + + + diff -r 000000000000 -r a6beb4d4c417 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Jun 05 11:41:26 2015 -0400 @@ -0,0 +1,6 @@ + + + + + +