view convert.xml @ 24:3accdbe6503b draft default tip

Deleted selected files
author wolma
date Thu, 21 Jul 2016 03:56:19 -0400
parents 5db0545b9004
children
line wrap: on
line source

<tool id="convert" name="Convert" version="0.1.7.3">
  <description>between different sequence data formats</description>
  <macros>
    <import>toolshed_macros.xml</import>
  </macros>
  <expand macro="requirements" />
  <version_command>python3 -m MiModD version -q</version_command>
  <command>
  #if $str($mode.split_on_rgs) or $str($mode.oformat)=="fastq" or $str($mode.oformat)=="gz":
    echo "Your input data is now getting processed by MiModD. The output will be split into several files based on the read groups found in the input.\nThis history item will remain in the busy state until the job is finished.\nAfter the job is showing as finished, Galaxy will start adding the results files to your history one by one.\n\nThis may take a while to complete! \n\nYou should refresh your history to see if new files have arrived.\n\nThis message is for your information only and can be deleted from the history once the job has finished." &gt; $output_split_on_read_groups;

    mkdir converted_data;
  #end if
  
	python3 -m MiModD convert 
 
  	#for $i in $mode.input_list
	    "${i.file1}"
	    #if $str($mode.iformat) in ("fastq_pe", "gz_pe"):
		"${i.file2}"
	    #end if
	#end for
	#if $str($mode.header) != "None":  	
		--header "$(mode.header)"
	#end if

	#if $str($outputname) == "None":
	  --ofile converted_data/read_group
	#else
	  --ofile "$outputname"
	#end if
	--iformat $(mode.iformat)
	--oformat $(mode.oformat)
	${mode.split_on_rgs}
  </command>
  
  <inputs>
    <conditional name="mode">
	  <param help="Your choice will update the interface to display further choices appropriate for your type of input data." label="input file format" name="iformat" type="select">
	    <option value="fastq">fastq: single-end (one file)</option>
	    <option value="fastq_pe">fastq: paired-end (two files)</option>
	    <option value="gz">gzip compressed fastq: single-end (one file)</option>
	    <option value="gz_pe">gzip compressed fastq: paired-end (two files)</option>
	    <option value="sam">sam</option>
	    <option value="bam">bam</option>
      </param>	
        <when value="fastq">
	      <param label="output file format" name="oformat" type="select">
	        <option value="sam">sam</option>
	        <option value="bam">bam</option>
	      </param>
	      <repeat default="1" min="1" name="input_list" title="fastq input dataset">
		    <param format="fastq" label="inputfile" name="file1" type="data" />
	      </repeat>
          <param format="sam" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file." label="Use Header File" name="header" type="data" />
          <param name="split_on_rgs" type="hidden" value="" />	    
	    </when>
	    <when value="fastq_pe">
	      <param label="output file format" name="oformat" type="select">
	        <option value="sam">sam</option>
	        <option value="bam">bam</option>
	      </param>
	      <repeat default="1" min="1" name="input_list" title="fastq input datasets">
		    <param format="fastq" label="inputfile with the first set of reads of paired-end data" name="file1" type="data" />		
		    <param format="fastq" label="inputfile with the second set of reads of paired-end data" name="file2" type="data" />
	      </repeat>
          <param format="sam" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file." label="Use Header File" name="header" type="data" />
          <param name="split_on_rgs" type="hidden" value="" />
	    </when>
        <when value="gz">
	      <param label="output file format" name="oformat" type="select">
	        <option value="sam">sam</option>
	        <option value="bam">bam</option>
	      </param>
	      <repeat default="1" min="1" name="input_list" title="fastq.gz input dataset">
		    <param format="data" label="inputfile" name="file1" type="data" />
	      </repeat>
          <param format="sam" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file." label="Use Header File" name="header" type="data" />
          <param name="split_on_rgs" type="hidden" value="" />
	    </when>
	    <when value="gz_pe">
	      <param label="output file format" name="oformat" type="select">
	        <option value="sam">sam</option>
	        <option value="bam">bam</option>
	      </param>
	      <repeat default="1" min="1" name="input_list" title="fastq.gz input datasets">
		    <param format="data" label="inputfile with the first set of reads of paired-end data" name="file1" type="data" />		
		    <param format="data" label="inputfile with the second set of reads of paired-end data" name="file2" type="data" />
	      </repeat>
          <param format="sam" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file." label="Use Header File" name="header" type="data" />
          <param name="split_on_rgs" type="hidden" value="" />
	    </when>
	    <when value="sam">
	      <param label="output file format" name="oformat" type="select">
	        <option value="bam">bam</option>
	        <option value="sam">sam</option>
	        <option value="fastq">fastq</option>
	        <option value="gz">gzipped fastq</option>
	      </param>
	      <repeat default="1" max="1" min="1" name="input_list" title="sam input dataset">
		    <param format="sam" label="inputfile" name="file1" type="data" />
	      </repeat>
	      <param name="header" type="hidden" value="None" />
	      <param checked="false" falsevalue="" help="If the input file contains reads from different read groups, write them to separate output files; implied automatically for conversions to fastq and gzipped fastq format" label="Split output based on read group IDs" name="split_on_rgs" truevalue="--split-on-rgs" type="boolean" />
	    </when>
	    <when value="bam">
	      <param label="output file format" name="oformat" type="select">
	        <option value="sam">sam</option>
	        <option value="bam">bam</option>
	        <option value="fastq">fastq</option>
	        <option value="gz">gzipped fastq</option>
	      </param>
	      <repeat default="1" max="1" min="1" name="input_list" title="bam input dataset">
		    <param format="bam" label="inputfile" name="file1" type="data" />
	      </repeat>
	      <param name="header" type="hidden" value="None" />
	      <param checked="false" falsevalue="" help="If the input file contains reads from different read groups, write them to separate output files; implied automatically for conversions to fastq and gzipped fastq format" label="Split output based on read group IDs" name="split_on_rgs" truevalue="--split-on-rgs" type="boolean" />
	    </when>
    </conditional>
  </inputs>
  
  <outputs>
    <data format="bam" label="Converted reads from MiModd ${tool.name} on ${on_string}" name="outputname">
	  <change_format>
	      <when format="sam" input="mode.oformat" value="sam" />
	  </change_format>
	  <filter>
	    (not mode['split_on_rgs'] and mode['oformat'] not in ("fastq", "gz"))
	  </filter>
    </data>
    
    <data format="txt" label="MiModD ${tool.name} run on ${on_string}" name="output_split_on_read_groups">
	  <filter>
	    (mode['split_on_rgs'] or mode['oformat'] in ("fastq", "gz"))
	  </filter>
	  <discover_datasets directory="converted_data" pattern="__designation_and_ext__" visible="true" />
    </data>
  </outputs>

<help>
.. class:: infomark

   **What it does**

The tool converts between different file formats used for storing next-generation sequencing data.

As input file types it can handle uncompressed or gzipped fastq, SAM or BAM format, which it can convert to SAM or BAM format.

**Notes:**

1) In its standard configuration Galaxy will decompress any .gz files during their upload, so the option to convert gzipped fastq input is useful only with customized Galaxy instances or by using linked files as explained in our `recipe for using gzipped fastq files in Galaxy`_ from the `MiModD user guide`_.

2) The tool can convert fastq files representing data from paired-end sequencing runs to appropriate SAM/BAM format provided that the mate information is split over two fastq files in corresponding order.

   **TIP:** If your paired-end data is arranged differently, you may look into the *fastq splitter* and *fastq de-interlacer* tools for Galaxy from the `Fastq Manipulation category`_ of the Galaxy Tool Shed to see if they can convert your files to the expected format.
   
3) Merging partial fastq (or gzipped fastq) files into a single SAM/BAM file is supported both for single-end and paired-end data. Simply add additional input datasets and select the appropriate files (pairs of files in case of paired-end data).

   Concatenation of SAM/BAM file during conversion is currently not supported.

4) For input in fastq format a SAM header file providing run metadata **has to be specified**. The information in this file will be used as the header data of the new SAM/BAM file. You can use the *NGS Run Annotation* tool to generate a new header file for your data.

   For input in SAM/BAM format the tool will simply copy the existing header data to the new file. To modify the header of an existing SAM/BAM file, use the *Reheader BAM file* tool instead.

.. _Fastq Manipulation category: https://toolshed.g2.bx.psu.edu/repository/browse_repositories_in_category?id=310ff67d4caf6531
.. _recipe for using gzipped fastq files in Galaxy: http://mimodd.readthedocs.org/en/latest/recipes.html#use-gzipped-fastq-files-in-galaxy
.. _MiModD user guide: http://mimodd.readthedocs.org/en/latest

</help>
</tool>