# HG changeset patch # User cmonjeau # Date 1433518849 14400 # Node ID f905f982ee3d3a5f18ba17b5619623075dca4648 Imported from capsule None diff -r 000000000000 -r f905f982ee3d GSV.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GSV.py Fri Jun 05 11:40:49 2015 -0400 @@ -0,0 +1,66 @@ + +""" +GSV visualisation datatypes +""" + + +import logging, os, os.path, sys, time, tempfile, shutil, string, glob, re + +from galaxy.datatypes.sniff import * +from galaxy.datatypes import data +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.xml import GenericXml + +log = logging.getLogger(__name__) + +class GenericMapJson( data.Text ): + """Base format class for any JSON file.""" + file_ext = "mapjson" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'Mapjson data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def sniff( self, filename ): + """ + Determines whether the file is JSON or not + + >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' ) + >>> GenericMapJson().sniff( fname ) + True + >>> fname = get_test_fname( 'interval.interval' ) + >>> GenericMapJson().sniff( fname ) + False + """ + #TODO - Use a context manager on Python 2.5+ to close handle + handle = open(filename) + line = handle.readline() + handle.close() + + +class Gjson( GenericMapJson ): + """ + Resource Description Framework format (http://www.w3.org/RDF/). + """ + file_ext = "gjson" + + def sniff( self, filename ): + """ + Returns false and the user must manually set. + """ + return False + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'GJSON data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + diff -r 000000000000 -r f905f982ee3d datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Fri Jun 05 11:40:49 2015 -0400 @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff -r 000000000000 -r f905f982ee3d mapsembler2.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mapsembler2.py Fri Jun 05 11:40:49 2015 -0400 @@ -0,0 +1,94 @@ +#!/usr/bin/env python +import sys, subprocess, glob +import os, re, shutil, optparse +from os.path import basename + +""" +WARNING : + +Mapsembler2.py needs mapsembler2_exe binaries in your $PATH + +Mapsember2 is available after compiling sources : + +http://www.irisa.fr/symbiose/people/ppeterlongo/mapsembler2_2.2.3.zip + +or with the galaxy_mapsembler2 package in the GenOuest toolshed + + +""" + +def __main__(): + + # arguments recuperation + parser = optparse.OptionParser() + parser.add_option("-s", dest="input_starters") + parser.add_option("-r", dest="input_files") + parser.add_option("-t", dest="output_extension") + parser.add_option("-k", dest="kmer") + parser.add_option("-c", dest="coverage") + parser.add_option("-d", dest="substitutions") + parser.add_option("-g", dest="genome_size") + parser.add_option("-f", dest="process_search") + parser.add_option("-x", dest="max_length") + parser.add_option("-y", dest="max_depth") + parser.add_option("--output") + parser.add_option("-i", dest="index_files") + + (options, args) = parser.parse_args() + + # import tools + os.symlink(os.environ['TOOLS'], os.getcwd()+'/tools') + + # execute mapsembler + cmd_line=[] + cmd_line.append("run_mapsembler2_pipeline.sh") + + # change starter extension + cmd_line.extend(["-s", options.input_starters]) + + #inputs + cmd_line.append("-r") + + #cmd_line.append(inputs) + cmd_line.append(' '.join(options.input_files.split(","))) + + # add parameters into the command line + cmd_line.extend(["-t", options.output_extension, "-k", options.kmer, "-c", options.coverage, "-d", options.substitutions, "-g", options.genome_size, "-f", options.process_search, "-x", options.max_length, "-y", options.max_depth]) + + # open the output log file + log = open(options.output, "w") + log.write("[COMMAND LINE] "+' '.join(cmd_line)) + + process=subprocess.Popen(cmd_line, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + stdoutput, stderror = process.communicate() + + # results recuperation + log.write(stdoutput) + log.write(stderror) + + # close log file + log.close() + + # move results files inside the job_outputs dir + os.mkdir("job_outputs") + result_files = glob.glob("res_*") + for file in result_files: + shutil.move(file, "job_outputs/") + + + # move index files + if options.index_files == "true": + index_files = glob.glob("index_*") + for index in index_files: + shutil.move(index, "job_outputs/") + + # move json result into gjson + json_files = glob.glob("job_outputs/*.json") + for json in json_files: + shutil.move(json, json.replace(".json", ".gjson")) + + +if __name__ == "__main__": __main__() + diff -r 000000000000 -r f905f982ee3d mapsembler2.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mapsembler2.xml Fri Jun 05 11:40:49 2015 -0400 @@ -0,0 +1,108 @@ + + is a targeted assembly software. + + mapsembler2 + + +mapsembler2.py +-s $input_starters +-r $data_files +-t $output_extension +-k $kmer +-c $coverage +-d $substitutions +-g $genome_size +-f $process_search +-x $max_length +-y $max_depth +--output $output +-i $index_files + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**Description** + +Mapsembler2 is a targeted assembly software. It takes as input a set of NGS raw reads (fasta or fastq, gzipped or not) and a set of input sequences (starters). It first determines if each starter is read-coherent, e.g. whether reads confirm the presence of each starter in the original sequence. Then for each read-coherent starter, Mapsembler2 outputs its sequence neighborhood as a linear sequence or as a graph, depending on the user choice. +Mapsembler2 may be used for (not limited to): + +· Validate an assembled sequence (input as starter), e.g. from a de Bruijn graph assembly where read-coherence was not enforced. + +· Checks if a gene (input as starter) has an homolog in a set of reads. + +· Checks if a known enzyme is present in a metagenomic NGS read set. + +· Enrich unmappable reads by extending them, possibly making them mappable. + +· Checks what happens at the extremities of a contig. + +· Remove contaminants or symbiont reads from a read set + +------- + +**Web site** + +http://colibread.inria.fr/mapsembler2/ + +------- + +**Integrated by** + +Cyril Monjeaud + +GenOuest Bio-informatics Core Facility + +UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France) + +support@genouest.org + +If you use this tool in Galaxy, please cite : + +`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. <https://www.e-biogenouest.org/resources/128>`_ + + + +10.1186/1471-2105-13-48 +@INPROCEEDINGS{JOBIM2013, + author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.}, + title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France}, + booktitle = {JOBIM 2013 Proceedings}, + year = {2013}, + url = {https://www.e-biogenouest.org/resources/128}, + pages = {97-106} + } + + + + + + diff -r 000000000000 -r f905f982ee3d tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Jun 05 11:40:49 2015 -0400 @@ -0,0 +1,6 @@ + + + + + +