changeset 0:f905f982ee3d

Imported from capsule None
author cmonjeau
date Fri, 05 Jun 2015 11:40:49 -0400
parents
children f7458a23cebe
files GSV.py datatypes_conf.xml mapsembler2.py mapsembler2.xml tool_dependencies.xml
diffstat 5 files changed, 288 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GSV.py	Fri Jun 05 11:40:49 2015 -0400
@@ -0,0 +1,66 @@
+
+"""
+GSV visualisation datatypes
+"""
+
+
+import logging, os, os.path, sys, time, tempfile, shutil, string, glob, re
+
+from galaxy.datatypes.sniff import *
+from galaxy.datatypes import data
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes.xml import GenericXml
+
+log = logging.getLogger(__name__)
+
+class GenericMapJson( data.Text ):
+    """Base format class for any JSON file."""
+    file_ext = "mapjson"
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        """Set the peek and blurb text"""
+        if not dataset.dataset.purged:
+            dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+            dataset.blurb = 'Mapjson data'
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+
+    def sniff( self, filename ):
+        """
+	Determines whether the file is JSON or not
+
+        >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' )
+        >>> GenericMapJson().sniff( fname )
+        True
+	>>> fname = get_test_fname( 'interval.interval' )
+        >>> GenericMapJson().sniff( fname )
+        False
+	"""
+	#TODO - Use a context manager on Python 2.5+ to close handle
+        handle = open(filename)
+        line = handle.readline()
+        handle.close()
+
+
+class Gjson( GenericMapJson ):
+    """
+    Resource Description Framework format (http://www.w3.org/RDF/).
+    """
+    file_ext = "gjson"
+
+    def sniff( self, filename ):
+        """
+	Returns false and the user must manually set.
+        """
+        return False
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        """Set the peek and blurb text"""
+        if not dataset.dataset.purged:
+            dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+            dataset.blurb = 'GJSON data'
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml	Fri Jun 05 11:40:49 2015 -0400
@@ -0,0 +1,14 @@
+<?xml version="1.0"?>
+<datatypes>
+    <datatype_files>
+        <datatype_file name="GSV.py"/>
+    </datatype_files>
+    <registration>
+    <datatype extension="gjson" type="galaxy.datatypes.GSV:Gjson" mimetype="application/json" display_in_upload="true"/>
+    <datatype extension="mapjson" type="galaxy.datatypes.GSV:GenericMapJson" mimetype="application/json" display_in_upload="true"/>
+    </registration>
+  <sniffers>
+    <sniffer type="galaxy.datatypes.GSV:Gjson"/>
+    <sniffer type="galaxy.datatypes.GSV:GenericMapJson"/>
+  </sniffers>
+</datatypes>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mapsembler2.py	Fri Jun 05 11:40:49 2015 -0400
@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+import sys, subprocess, glob
+import os, re, shutil, optparse
+from os.path import basename
+
+"""
+WARNING :
+
+Mapsembler2.py needs mapsembler2_exe binaries in your $PATH
+
+Mapsember2 is available after compiling sources :
+
+http://www.irisa.fr/symbiose/people/ppeterlongo/mapsembler2_2.2.3.zip
+
+or with the galaxy_mapsembler2 package in the GenOuest toolshed
+
+
+"""
+
+def __main__():
+
+	# arguments recuperation
+        parser = optparse.OptionParser()
+        parser.add_option("-s", dest="input_starters")
+        parser.add_option("-r", dest="input_files")
+        parser.add_option("-t", dest="output_extension")
+        parser.add_option("-k", dest="kmer")
+        parser.add_option("-c", dest="coverage")
+        parser.add_option("-d", dest="substitutions")
+        parser.add_option("-g", dest="genome_size")
+        parser.add_option("-f", dest="process_search")
+        parser.add_option("-x", dest="max_length")
+        parser.add_option("-y", dest="max_depth")
+        parser.add_option("--output")
+        parser.add_option("-i", dest="index_files")
+
+        (options, args) = parser.parse_args()
+
+	# import tools
+	os.symlink(os.environ['TOOLS'], os.getcwd()+'/tools')
+
+	# execute mapsembler
+	cmd_line=[]
+	cmd_line.append("run_mapsembler2_pipeline.sh")
+
+	# change starter extension
+	cmd_line.extend(["-s", options.input_starters])
+
+	#inputs
+	cmd_line.append("-r")
+
+	#cmd_line.append(inputs)
+	cmd_line.append(' '.join(options.input_files.split(",")))
+
+	# add parameters into the command line
+	cmd_line.extend(["-t", options.output_extension, "-k", options.kmer, "-c", options.coverage, "-d", options.substitutions, "-g", options.genome_size, "-f", options.process_search, "-x", options.max_length, "-y", options.max_depth])
+	
+	# open the output log file
+        log = open(options.output, "w")
+	log.write("[COMMAND LINE] "+' '.join(cmd_line))
+
+	process=subprocess.Popen(cmd_line,
+                   stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+        stdoutput, stderror = process.communicate()
+	
+	# results recuperation
+	log.write(stdoutput)
+	log.write(stderror)
+
+	# close log file
+	log.close()
+	
+	# move results files inside the job_outputs dir
+	os.mkdir("job_outputs")
+	result_files = glob.glob("res_*")
+	for file in result_files:
+		shutil.move(file, "job_outputs/")
+
+
+	# move index files
+	if options.index_files == "true":
+	        index_files = glob.glob("index_*")
+		for index in index_files:
+			shutil.move(index, "job_outputs/")
+
+	# move json result into gjson
+        json_files = glob.glob("job_outputs/*.json")
+        for json in json_files:
+		shutil.move(json, json.replace(".json", ".gjson"))
+
+
+if __name__ == "__main__": __main__()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mapsembler2.xml	Fri Jun 05 11:40:49 2015 -0400
@@ -0,0 +1,108 @@
+<tool id="mapsembler2" name="Mapsembler2" version="2.2.3">
+  <description>is a targeted assembly software.</description>
+  <requirements>
+    <requirement type="package" version="2.2.3">mapsembler2</requirement>
+  </requirements>
+<command interpreter="python">
+mapsembler2.py
+-s $input_starters
+-r $data_files
+-t $output_extension
+-k $kmer
+-c $coverage
+-d $substitutions
+-g $genome_size
+-f $process_search
+-x $max_length
+-y $max_depth
+--output $output
+-i $index_files
+</command>
+
+  <inputs>
+	<!-- Input data files -->
+	<param name="input_starters" type="data" format="fasta" label="Starters" help="set of input sequences" /> 	
+	<param name="data_files" type="data" multiple="true" format="fasta,fastq" label="Read file" help="Data loaded in the script" />
+	<param name="output_extension" type="select" label="Select your output extension type">
+		<option value="1">a strict sequence</option>
+		<option value="2">a consensus sequence</option>
+		<option value="3">a strict graph</option>
+		<option value="4">a consensus graph</option>
+	</param>
+	<param name="kmer" type="integer" label="Size of kmers" value="31" help="Set the length of used kmers. Must fit the compiled value. Only uneven number" />
+	<param name="coverage" type="integer" label="Minimal coverage" value="5" help="set the minimal coverage: Used by Phaser (don't use kmers with lower coverage) "/>
+        <param name="substitutions" type="integer" label="Number of authorized substitutions" value="1" help="set the number of authorized substitutions used while mapping reads on finding SNPs"/>
+	<param name="genome_size" type="integer" label="Estimated genome size" value="10000000" help="Used only to control memory usage. e.g.3 billion (3000000000) uses 4Gb of RAM." />
+        <param name="process_search" type="select" label="Process of search" help="Set the process of search in the graph" >
+		<option value="1">Breadth</option>
+                <option value="2">Depth</option>
+ 	</param>
+        <param name="max_length" type="integer" label="Max length of nodes" value="40" help="set the maximal length of nodes"/>
+        <param name="max_depth" type="integer" label="Max depth of nodes" value="10000" help="set the maximal depth of the graph"/>
+	<param name="index_files" type="boolean" checked="false" default="false" label="Include index output files" />
+  </inputs>
+
+  <outputs>
+      <data format="txt" name="output" label="${tool.name} on ${on_string}: out.txt" >
+		<discover_datasets pattern="__designation_and_ext__" directory="job_outputs" visible="true" />
+      </data>
+
+  </outputs>
+  <help>
+
+**Description**
+
+Mapsembler2 is a targeted assembly software. It takes as input a set of NGS raw reads (fasta or fastq, gzipped or not) and a set of input sequences (starters). It first determines if each starter is read-coherent, e.g. whether reads confirm the presence of each starter in the original sequence. Then for each read-coherent starter, Mapsembler2 outputs its sequence neighborhood as a linear sequence or as a graph, depending on the user choice.
+Mapsembler2 may be used for (not limited to):
+
+· Validate an assembled sequence (input as starter), e.g. from a de Bruijn graph assembly where read-coherence was not enforced.
+
+· Checks if a gene (input as starter) has an homolog in a set of reads.
+
+· Checks if a known enzyme is present in a metagenomic NGS read set.
+
+· Enrich unmappable reads by extending them, possibly making them mappable.
+
+· Checks what happens at the extremities of a contig.
+
+· Remove contaminants or symbiont reads from a read set
+
+-------
+
+**Web site**
+
+http://colibread.inria.fr/mapsembler2/
+
+-------
+
+**Integrated by**
+
+Cyril Monjeaud 
+
+GenOuest Bio-informatics Core Facility
+
+UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France)
+
+support@genouest.org
+
+If you use this tool in Galaxy, please cite :
+
+`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. &lt;https://www.e-biogenouest.org/resources/128&gt;`_
+
+  </help>
+<citations>
+<citation type="doi">10.1186/1471-2105-13-48</citation>
+<citation type="bibtex">@INPROCEEDINGS{JOBIM2013,
+    author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},
+    title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},
+    booktitle = {JOBIM 2013 Proceedings},
+    year = {2013},
+    url = {https://www.e-biogenouest.org/resources/128},
+    pages = {97-106}
+    }
+</citation>
+
+</citations>
+
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Fri Jun 05 11:40:49 2015 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="mapsembler2" version="2.2.3">
+      <repository changeset_revision="486979ed9bc7" name="package_mapsembler2" owner="cmonjeau" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>