view convert.xml @ 18:2742ad4d1608 draft

rebase on package_python_3_4_lean
author wolma
date Tue, 26 Apr 2016 11:21:43 -0400
parents 93db2f9bca12
children c46406466625
line wrap: on
line source

<tool id="convert" name="Convert" version="0.1.7.2">
  <description>between different sequence data formats</description>
  <macros>
    <import>toolshed_macros.xml</import>
  </macros>
  <expand macro="requirements"/>
  <version_command>mimodd version -q</version_command>
  <command>
  #if $str($mode.split_on_rgs) or $str($mode.oformat)=="fastq" or $str($mode.oformat)=="gz":
    echo "Your input data is now getting processed by MiModD. The output will be split into several files based on the read groups found in the input.\nThis history item will remain in the busy state until the job is finished.\nAfter the job is showing as finished, Galaxy will start adding the results files to your history one by one.\n\nThis may take a while to complete! \n\nYou should refresh your history to see if new files have arrived.\n\nThis message is for your information only and can be deleted from the history once the job has finished." > $output_split_on_read_groups;

    mkdir converted_data;
  #end if
  
	mimodd convert 
 
  	#for $i in $mode.input_list
	    "${i.file1}"
	    #if $str($mode.iformat) in ("fastq_pe", "gz_pe"):
		"${i.file2}"
	    #end if
	#end for
	#if $str($mode.header) != "None":  	
		--header "$(mode.header)"
	#end if

	#if $str($outputname) == "None":
	  --ofile converted_data/read_group
	#else
	  --ofile "$outputname"
	#end if
	--iformat $(mode.iformat)
	--oformat $(mode.oformat)
	${mode.split_on_rgs}
  </command>
  
  <inputs>
    <conditional name="mode">
	  <param name="iformat" type="select" label="input file format" help="Your choice will update the interface to display further choices appropriate for your type of input data.">
	    <option value="fastq">fastq: single-end (one file)</option>
	    <option value="fastq_pe">fastq: paired-end (two files)</option>
	    <option value="gz">gzip compressed fastq: single-end (one file)</option>
	    <option value="gz_pe">gzip compressed fastq: paired-end (two files)</option>
	    <option value="sam">sam</option>
	    <option value="bam">bam</option>
      </param>	
        <when value="fastq">
	      <param name="oformat" type="select" label="output file format">
	        <option value="sam">sam</option>
	        <option value="bam">bam</option>
	      </param>
	      <repeat name="input_list" title="fastq input dataset" default="1" min="1">
		    <param name="file1" format="fastq" type="data" label="inputfile"/>
	      </repeat>
          <param name="header" type="data" format="sam" label="Use Header File" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file."/>
          <param name="split_on_rgs" type="hidden" value=""/>	    
	    </when>
	    <when value="fastq_pe">
	      <param name="oformat" type="select" label="output file format">
	        <option value="sam">sam</option>
	        <option value="bam">bam</option>
	      </param>
	      <repeat name="input_list" title="fastq input datasets" default="1" min="1">
		    <param format="fastq" name="file1" type="data" label="inputfile with the first set of reads of paired-end data"/>		
		    <param format="fastq" name="file2" type="data" label="inputfile with the second set of reads of paired-end data"/>
	      </repeat>
          <param name="header" type="data" format="sam" label="Use Header File" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file."/>
          <param name="split_on_rgs" type="hidden" value=""/>
	    </when>
        <when value="gz">
	      <param name="oformat" type="select" label="output file format">
	        <option value="sam">sam</option>
	        <option value="bam">bam</option>
	      </param>
	      <repeat name="input_list" title="fastq.gz input dataset" default="1" min="1">
		    <param name="file1" format="data" type="data" label="inputfile"/>
	      </repeat>
          <param name="header" type="data" format="sam" label="Use Header File" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file."/>
          <param name="split_on_rgs" type="hidden" value=""/>
	    </when>
	    <when value="gz_pe">
	      <param name="oformat" type="select" label="output file format">
	        <option value="sam">sam</option>
	        <option value="bam">bam</option>
	      </param>
	      <repeat name="input_list" title="fastq.gz input datasets" default="1" min="1">
		    <param format="data" name="file1" type="data" label="inputfile with the first set of reads of paired-end data"/>		
		    <param format="data" name="file2" type="data" label="inputfile with the second set of reads of paired-end data"/>
	      </repeat>
          <param name="header" type="data" format="sam" label="Use Header File" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file."/>
          <param name="split_on_rgs" type="hidden" value=""/>
	    </when>
	    <when value="sam">
	      <param name="oformat" type="select" label="output file format">
	        <option value="bam">bam</option>
	        <option value="sam">sam</option>
	        <option value="fastq">fastq</option>
	        <option value="gz">gzipped fastq</option>
	      </param>
	      <repeat name="input_list" title="sam input dataset" default="1" min="1" max="1">
		    <param name="file1" format="sam" type="data" label="inputfile"/>
	      </repeat>
	      <param name="header" type="hidden" value="None"/>
	      <param name="split_on_rgs" type="boolean" truevalue="--split-on-rgs" falsevalue="" checked="false" label="Split output based on read group IDs" help="If the input file contains reads from different read groups, write them to separate output files; implied automatically for conversions to fastq and gzipped fastq format"/>
	    </when>
	    <when value="bam">
	      <param name="oformat" type="select" label="output file format">
	        <option value="sam">sam</option>
	        <option value="bam">bam</option>
	        <option value="fastq">fastq</option>
	        <option value="gz">gzipped fastq</option>
	      </param>
	      <repeat name="input_list" title="bam input dataset" default="1" min="1" max="1">
		    <param name="file1" format="bam" type="data" label="inputfile"/>
	      </repeat>
	      <param name="header" type="hidden" value="None"/>
	      <param name="split_on_rgs" type="boolean" truevalue="--split-on-rgs" falsevalue="" checked="false" label="Split output based on read group IDs" help="If the input file contains reads from different read groups, write them to separate output files; implied automatically for conversions to fastq and gzipped fastq format"/>
	    </when>
    </conditional>
  </inputs>
  
  <outputs>
    <data name="outputname" format="bam" label="Converted reads from MiModd ${tool.name} on ${on_string}">
	  <change_format>
	      <when input="mode.oformat" value="sam" format="sam" />
	  </change_format>
	  <filter>
	    (not mode['split_on_rgs'] and mode['oformat'] not in ("fastq", "gz"))
	  </filter>
    </data>
    
    <data name="output_split_on_read_groups" format="txt" label="MiModD ${tool.name} run on ${on_string}">
	  <filter>
	    (mode['split_on_rgs'] or mode['oformat'] in ("fastq", "gz"))
	  </filter>
	  <discover_datasets pattern="__designation_and_ext__" directory="converted_data" visible="true" />
    </data>
  </outputs>

<help>
.. class:: infomark

   **What it does**

The tool converts between different file formats used for storing next-generation sequencing data.

As input file types it can handle uncompressed or gzipped fastq, SAM or BAM format, which it can convert to SAM or BAM format.

**Notes:**

1) In its standard configuration Galaxy will decompress any .gz files during their upload, so the option to convert gzipped fastq input is useful only with customized Galaxy instances or by using linked files as explained in our `recipe for using gzipped fastq files in Galaxy`_ from the `MiModD user guide`_.

2) The tool can convert fastq files representing data from paired-end sequencing runs to appropriate SAM/BAM format provided that the mate information is split over two fastq files in corresponding order.

   **TIP:** If your paired-end data is arranged differently, you may look into the *fastq splitter* and *fastq de-interlacer* tools for Galaxy from the `Fastq Manipulation category`_ of the Galaxy Tool Shed to see if they can convert your files to the expected format.
   
3) Merging partial fastq (or gzipped fastq) files into a single SAM/BAM file is supported both for single-end and paired-end data. Simply add additional input datasets and select the appropriate files (pairs of files in case of paired-end data).

   Concatenation of SAM/BAM file during conversion is currently not supported.

4) For input in fastq format a SAM header file providing run metadata **has to be specified**. The information in this file will be used as the header data of the new SAM/BAM file. You can use the *NGS Run Annotation* tool to generate a new header file for your data.

   For input in SAM/BAM format the tool will simply copy the existing header data to the new file. To modify the header of an existing SAM/BAM file, use the *Reheader BAM file* tool instead.

.. _Fastq Manipulation category: https://toolshed.g2.bx.psu.edu/repository/browse_repositories_in_category?id=310ff67d4caf6531
.. _recipe for using gzipped fastq files in Galaxy: http://mimodd.readthedocs.org/en/latest/recipes.html#use-gzipped-fastq-files-in-galaxy
.. _MiModD user guide: http://mimodd.readthedocs.org/en/latest

</help>
</tool>