view convert.xml @ 0:e2b5f503e7b8 draft

Imported from capsule None
author wolma
date Sat, 13 Dec 2014 17:20:11 -0500
parents
children
line wrap: on
line source

<tool id="convert" name="Convert">
  <description>between different sequence data formats</description>
  <requirements>
    <requirement type="package" version="0.1.5">mimodd</requirement>
  </requirements>
  <version_command>mimodd version -q</version_command>
  <command>
	mimodd convert 
 
  	#for $i in $mode.input_list
	    ${i.file1}
	    #if $str($mode.iformat) in ("fastq_pe", "gz_pe"):
		${i.file2}
	    #end if
	#end for
	#if $str($mode.header) != "None":  	
		--header $(mode.header)
	#end if
	--ofile $outputname
	--iformat $(mode.iformat)
	--oformat $(mode.oformat)
  </command>
  
  <inputs>
    <conditional name="mode">
	<param name="iformat" type="select" label="input file format" help="Your choice will update the interface to display further choices appropriate for your type of input data.">
	    <option value="fastq">fastq: single-end (one file)</option>
	    <option value="fastq_pe">fastq: paired-end (two files)</option>
	    <option value="gz">gzip compressed fastq: single-end (one file)</option>
	    <option value="gz_pe">gzip compressed fastq: paired-end (two files)</option>
	    <option value="sam">sam</option>
	    <option value="bam">bam</option>
        </param>	
            <when value="fastq">
	      <param name="oformat" type="select" label="output file format">
	        <option value="sam">sam</option>
	        <option value="bam">bam</option>
	      </param>
	    <repeat name="input_list" title="fastq input dataset" default="1" min="1">
		<param name="file1" format="fastq" type="data" label="inputfile"/>
	    </repeat>
        <param name="header" type="data" format="sam" label="Use Header File" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file."/>	    
	    </when>
	    <when value="fastq_pe">
	      <param name="oformat" type="select" label="output file format">
	        <option value="sam">sam</option>
	        <option value="bam">bam</option>
	      </param>
	      <repeat name="input_list" title="fastq input datasets" default="1" min="1">
		  <param format="fastq" name="file1" type="data" label="inputfile with the first set of reads of paired-end data"/>		
		  <param format="fastq" name="file2" type="data" label="inputfile with the second set of reads of paired-end data"/>
	      </repeat>
          <param name="header" type="data" format="sam" label="Use Header File" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file."/>
	    </when>
        <when value="gz">
	      <param name="oformat" type="select" label="output file format">
	        <option value="sam">sam</option>
	        <option value="bam">bam</option>
	      </param>
	    <repeat name="input_list" title="fastq.gz input dataset" default="1" min="1">
		<param name="file1" format="data" type="data" label="inputfile"/>
	    </repeat>
        <param name="header" type="data" format="sam" label="Use Header File" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file."/>
	    </when>
	    <when value="gz_pe">
	      <param name="oformat" type="select" label="output file format">
	        <option value="sam">sam</option>
	        <option value="bam">bam</option>
	      </param>
	      <repeat name="input_list" title="fastq.gz input datasets" default="1" min="1">
		<param format="data" name="file1" type="data" label="inputfile with the first set of reads of paired-end data"/>		
		<param format="data" name="file2" type="data" label="inputfile with the second set of reads of paired-end data"/>
	    </repeat>
        <param name="header" type="data" format="sam" label="Use Header File" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file."/>
	    </when>
	    <when value="sam">
	      <param name="oformat" type="select" label="output file format">
	        <option value="bam">bam</option>
	      </param>
	      <repeat name="input_list" title="sam input dataset" default="1" min="1" max="1">
		<param name="file1" format="sam" type="data" label="inputfile"/>
	    </repeat>
	    <param name="header" type="hidden" value="None"/>
	    </when>
	    <when value="bam">
	      <param name="oformat" type="select" label="output file format">
	        <option value="sam">sam</option>
	      </param>
	      <repeat name="input_list" title="bam input dataset" default="1" min="1" max="1">
		<param name="file1" format="bam" type="data" label="inputfile"/>
	    </repeat>
	    <param name="header" type="hidden" value="None"/>
	    </when>
    </conditional>
  </inputs>
  
  <outputs>
    <data name="outputname" format="bam" label="Converted reads from MiModd ${tool.name} on ${on_string}">
	<change_format>
	    <when input="mode.oformat" value="sam" format="sam" />
	</change_format>
    </data>
  </outputs>

<help>
.. class:: infomark

   **What it does**

The tool converts between different file formats used for storing next-generation sequencing data.

As input file types it can handle uncompressed or gzipped fastq, SAM or BAM format, which it can convert to SAM or BAM format.

**Notes:**

1) In its standard configuration Galaxy will decompress any .gz files during their upload, so the option to align gzipped fastq input is useful only with customized Galaxy instances or by using linked files as explained in our `recipe for using gzipped fastq files in Galaxy`_ from the `MiModD user guide`_.

2) The tool can convert fastq files representing data from paired-end sequencing runs to appropriate SAM/BAM format provided that the mate information is split over two fastq files in corresponding order.

   **TIP:** If your paired-end data is arranged differently, you may look into the *fastq splitter* and *fastq de-interlacer* tools for Galaxy from the `Fastq Manipulation category`_ of the Galaxy Tool Shed to see if they can convert your files to the expected format.
   
3) Merging partial fastq (or gzipped fastq) files into a single SAM/BAM file is supported both for single-end and paired-end data. Simply add additional input datasets and select the appropriate files (pairs of files in case of paired-end data).

   Concatenation of SAM/BAM file during conversion is currently not supported.

4) For input in fastq format a SAM header file providing run metadata **has to be specified**. The information in this file will be used as the header data of the new SAM/BAM file. You can use the *NGS Run Annotation* tool to generate a new header file for your data.

   For input in SAM/BAM format the tool will simply copy the existing header data to the new file. To modify the header of an existing SAM/BAM file, use the *Reheader BAM file* tool instead.

.. _Fastq Manipulation category: https://toolshed.g2.bx.psu.edu/repository/browse_repositories_in_category?id=310ff67d4caf6531
.. _recipe for using gzipped fastq files in Galaxy: http://mimodd.readthedocs.org/en/latest/recipes.html#use-gzipped-fastq-files-in-galaxy
.. _MiModD user guide: http://mimodd.readthedocs.org/en/latest

</help>
</tool>