Mercurial > repos > cmonjeau > mapsembler2
changeset 0:f905f982ee3d
Imported from capsule None
author | cmonjeau |
---|---|
date | Fri, 05 Jun 2015 11:40:49 -0400 |
parents | |
children | f7458a23cebe |
files | GSV.py datatypes_conf.xml mapsembler2.py mapsembler2.xml tool_dependencies.xml |
diffstat | 5 files changed, 288 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GSV.py Fri Jun 05 11:40:49 2015 -0400 @@ -0,0 +1,66 @@ + +""" +GSV visualisation datatypes +""" + + +import logging, os, os.path, sys, time, tempfile, shutil, string, glob, re + +from galaxy.datatypes.sniff import * +from galaxy.datatypes import data +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.xml import GenericXml + +log = logging.getLogger(__name__) + +class GenericMapJson( data.Text ): + """Base format class for any JSON file.""" + file_ext = "mapjson" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'Mapjson data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def sniff( self, filename ): + """ + Determines whether the file is JSON or not + + >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' ) + >>> GenericMapJson().sniff( fname ) + True + >>> fname = get_test_fname( 'interval.interval' ) + >>> GenericMapJson().sniff( fname ) + False + """ + #TODO - Use a context manager on Python 2.5+ to close handle + handle = open(filename) + line = handle.readline() + handle.close() + + +class Gjson( GenericMapJson ): + """ + Resource Description Framework format (http://www.w3.org/RDF/). + """ + file_ext = "gjson" + + def sniff( self, filename ): + """ + Returns false and the user must manually set. + """ + return False + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'GJSON data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Fri Jun 05 11:40:49 2015 -0400 @@ -0,0 +1,14 @@ +<?xml version="1.0"?> +<datatypes> + <datatype_files> + <datatype_file name="GSV.py"/> + </datatype_files> + <registration> + <datatype extension="gjson" type="galaxy.datatypes.GSV:Gjson" mimetype="application/json" display_in_upload="true"/> + <datatype extension="mapjson" type="galaxy.datatypes.GSV:GenericMapJson" mimetype="application/json" display_in_upload="true"/> + </registration> + <sniffers> + <sniffer type="galaxy.datatypes.GSV:Gjson"/> + <sniffer type="galaxy.datatypes.GSV:GenericMapJson"/> + </sniffers> +</datatypes>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mapsembler2.py Fri Jun 05 11:40:49 2015 -0400 @@ -0,0 +1,94 @@ +#!/usr/bin/env python +import sys, subprocess, glob +import os, re, shutil, optparse +from os.path import basename + +""" +WARNING : + +Mapsembler2.py needs mapsembler2_exe binaries in your $PATH + +Mapsember2 is available after compiling sources : + +http://www.irisa.fr/symbiose/people/ppeterlongo/mapsembler2_2.2.3.zip + +or with the galaxy_mapsembler2 package in the GenOuest toolshed + + +""" + +def __main__(): + + # arguments recuperation + parser = optparse.OptionParser() + parser.add_option("-s", dest="input_starters") + parser.add_option("-r", dest="input_files") + parser.add_option("-t", dest="output_extension") + parser.add_option("-k", dest="kmer") + parser.add_option("-c", dest="coverage") + parser.add_option("-d", dest="substitutions") + parser.add_option("-g", dest="genome_size") + parser.add_option("-f", dest="process_search") + parser.add_option("-x", dest="max_length") + parser.add_option("-y", dest="max_depth") + parser.add_option("--output") + parser.add_option("-i", dest="index_files") + + (options, args) = parser.parse_args() + + # import tools + os.symlink(os.environ['TOOLS'], os.getcwd()+'/tools') + + # execute mapsembler + cmd_line=[] + cmd_line.append("run_mapsembler2_pipeline.sh") + + # change starter extension + cmd_line.extend(["-s", options.input_starters]) + + #inputs + cmd_line.append("-r") + + #cmd_line.append(inputs) + cmd_line.append(' '.join(options.input_files.split(","))) + + # add parameters into the command line + cmd_line.extend(["-t", options.output_extension, "-k", options.kmer, "-c", options.coverage, "-d", options.substitutions, "-g", options.genome_size, "-f", options.process_search, "-x", options.max_length, "-y", options.max_depth]) + + # open the output log file + log = open(options.output, "w") + log.write("[COMMAND LINE] "+' '.join(cmd_line)) + + process=subprocess.Popen(cmd_line, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + stdoutput, stderror = process.communicate() + + # results recuperation + log.write(stdoutput) + log.write(stderror) + + # close log file + log.close() + + # move results files inside the job_outputs dir + os.mkdir("job_outputs") + result_files = glob.glob("res_*") + for file in result_files: + shutil.move(file, "job_outputs/") + + + # move index files + if options.index_files == "true": + index_files = glob.glob("index_*") + for index in index_files: + shutil.move(index, "job_outputs/") + + # move json result into gjson + json_files = glob.glob("job_outputs/*.json") + for json in json_files: + shutil.move(json, json.replace(".json", ".gjson")) + + +if __name__ == "__main__": __main__() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mapsembler2.xml Fri Jun 05 11:40:49 2015 -0400 @@ -0,0 +1,108 @@ +<tool id="mapsembler2" name="Mapsembler2" version="2.2.3"> + <description>is a targeted assembly software.</description> + <requirements> + <requirement type="package" version="2.2.3">mapsembler2</requirement> + </requirements> +<command interpreter="python"> +mapsembler2.py +-s $input_starters +-r $data_files +-t $output_extension +-k $kmer +-c $coverage +-d $substitutions +-g $genome_size +-f $process_search +-x $max_length +-y $max_depth +--output $output +-i $index_files +</command> + + <inputs> + <!-- Input data files --> + <param name="input_starters" type="data" format="fasta" label="Starters" help="set of input sequences" /> + <param name="data_files" type="data" multiple="true" format="fasta,fastq" label="Read file" help="Data loaded in the script" /> + <param name="output_extension" type="select" label="Select your output extension type"> + <option value="1">a strict sequence</option> + <option value="2">a consensus sequence</option> + <option value="3">a strict graph</option> + <option value="4">a consensus graph</option> + </param> + <param name="kmer" type="integer" label="Size of kmers" value="31" help="Set the length of used kmers. Must fit the compiled value. Only uneven number" /> + <param name="coverage" type="integer" label="Minimal coverage" value="5" help="set the minimal coverage: Used by Phaser (don't use kmers with lower coverage) "/> + <param name="substitutions" type="integer" label="Number of authorized substitutions" value="1" help="set the number of authorized substitutions used while mapping reads on finding SNPs"/> + <param name="genome_size" type="integer" label="Estimated genome size" value="10000000" help="Used only to control memory usage. e.g.3 billion (3000000000) uses 4Gb of RAM." /> + <param name="process_search" type="select" label="Process of search" help="Set the process of search in the graph" > + <option value="1">Breadth</option> + <option value="2">Depth</option> + </param> + <param name="max_length" type="integer" label="Max length of nodes" value="40" help="set the maximal length of nodes"/> + <param name="max_depth" type="integer" label="Max depth of nodes" value="10000" help="set the maximal depth of the graph"/> + <param name="index_files" type="boolean" checked="false" default="false" label="Include index output files" /> + </inputs> + + <outputs> + <data format="txt" name="output" label="${tool.name} on ${on_string}: out.txt" > + <discover_datasets pattern="__designation_and_ext__" directory="job_outputs" visible="true" /> + </data> + + </outputs> + <help> + +**Description** + +Mapsembler2 is a targeted assembly software. It takes as input a set of NGS raw reads (fasta or fastq, gzipped or not) and a set of input sequences (starters). It first determines if each starter is read-coherent, e.g. whether reads confirm the presence of each starter in the original sequence. Then for each read-coherent starter, Mapsembler2 outputs its sequence neighborhood as a linear sequence or as a graph, depending on the user choice. +Mapsembler2 may be used for (not limited to): + +· Validate an assembled sequence (input as starter), e.g. from a de Bruijn graph assembly where read-coherence was not enforced. + +· Checks if a gene (input as starter) has an homolog in a set of reads. + +· Checks if a known enzyme is present in a metagenomic NGS read set. + +· Enrich unmappable reads by extending them, possibly making them mappable. + +· Checks what happens at the extremities of a contig. + +· Remove contaminants or symbiont reads from a read set + +------- + +**Web site** + +http://colibread.inria.fr/mapsembler2/ + +------- + +**Integrated by** + +Cyril Monjeaud + +GenOuest Bio-informatics Core Facility + +UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France) + +support@genouest.org + +If you use this tool in Galaxy, please cite : + +`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. <https://www.e-biogenouest.org/resources/128>`_ + + </help> +<citations> +<citation type="doi">10.1186/1471-2105-13-48</citation> +<citation type="bibtex">@INPROCEEDINGS{JOBIM2013, + author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.}, + title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France}, + booktitle = {JOBIM 2013 Proceedings}, + year = {2013}, + url = {https://www.e-biogenouest.org/resources/128}, + pages = {97-106} + } +</citation> + +</citations> + +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Jun 05 11:40:49 2015 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="mapsembler2" version="2.2.3"> + <repository changeset_revision="486979ed9bc7" name="package_mapsembler2" owner="cmonjeau" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>