view print_reads.xml @ 0:e768f4851646 draft

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 10:48:38 -0400
parents
children a8b94864fe35
line wrap: on
line source

<tool id="gatk_print_reads" name="Print Reads" version="0.0.1">
  <description>from BAM files</description>
  <requirements>
      <requirement type="package" version="1.4">gatk</requirement>
      <requirement type="package" version="0.1.18">samtools</requirement>
  </requirements>
  <macros>
    <import>gatk_macros.xml</import>
  </macros>
  <command interpreter="python">gatk_wrapper.py
   --max_jvm_heap_fraction "1"
   --stdout "${output_log}"
   #for $i, $input_bam in enumerate( $reference_source.input_bams ):
       -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}"
       #if str( $input_bam.input_bam.metadata.bam_index ) != "None":
           -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index
       #end if
   #end for
   -p 'java 
    -jar "\$JAVA_JAR_PATH/GenomeAnalysisTK.jar"
    -T "PrintReads"
    ##--num_threads 4 ##hard coded, for now
    --out "${output_bam}"
    -et "NO_ET" ##ET no phone home
    #if $reference_source.reference_source_selector != "history":
        -R "${reference_source.ref_file.fields.path}"
    #end if
    --number "${number}"
    #if $platform:
        --platform "${platform}"
    #end if
    #if $read_group:
        --readGroup "${read_group}"
    #end if
    #for $sample_file in $sample_file_repeat:
        --sample_file "${sample_file.input_sample_file}"
    #end for
    #for $sample_name in $sample_name_repeat:
        --sample_name "${sample_name.sample_name}"
    #end for
   '
   
    #include source=$standard_gatk_options#
    
  </command>
  <inputs>
    <conditional name="reference_source">
      <expand macro="reference_source_selector_param" />
      <when value="cached">
        <repeat name="input_bams" title="BAM file" min="1" help="-I,--input_file &amp;lt;input_file&amp;gt;">
            <param name="input_bam" type="data" format="bam" label="BAM file">
              <validator type="unspecified_build" />
              <validator type="dataset_metadata_in_data_table" table_name="gatk_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
            </param>
        </repeat>
        <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
          <options from_data_table="gatk_picard_indexes">
            <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...--> 
          </options>
          <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
        </param>
      </when>
      <when value="history"> <!-- FIX ME!!!! -->
        <repeat name="input_bams" title="BAM file" min="1" help="-I,--input_file &amp;lt;input_file&amp;gt;">
            <param name="input_bam" type="data" format="bam" label="BAM file" >
            </param>
        </repeat>
        <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
      </when>
    </conditional>
    
    <param name="number" type="integer" value="-1" label="Print the first n reads from the file, discarding the rest" help="-n,--number &amp;lt;number&amp;gt;" />
    <param name="platform" type="text" value="" label="Exclude all reads with this platform from the output" help="-platform,--platform &amp;lt;platform&amp;gt;" />
    <param name="read_group" type="text" value="" label="Exclude all reads with this read group from the output" help="-readGroup,--readGroup &amp;lt;readGroup&amp;gt;" />
    <repeat name="sample_file_repeat" title="File containing a list of samples to include" help="-sf,--sample_file &amp;lt;sample_file&amp;gt;">
        <param name="input_sample_file" type="data" format="text" label="Sample file" />
    </repeat>
    <repeat name="sample_name_repeat" title="Sample name to be included in the analysis" help="-sn,--sample_name &amp;lt;sample_name&amp;gt;">
        <param name="sample_name" type="text" label="Sample name" />
    </repeat>
    
    <expand macro="gatk_param_type_conditional" />
    
  </inputs>
  <outputs>
    <data format="bam" name="output_bam" label="${tool.name} on ${on_string} (BAM)" />
    <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
  </outputs>
    <param name="number" type="integer" value="-1" label="Print the first n reads from the file, discarding the rest" />
    <param name="platform" type="text" value="" label="Exclude all reads with this platform from the output" />
    <param name="read_group" type="text" value="" label="Exclude all reads with this read group from the output" />
    <repeat name="sample_file_repeat" title="File containing a list of samples to include">
        <param name="input_sample_file" type="data" format="text" label="Sample file" />
    </repeat>
    <repeat name="sample_name_repeat" title="Sample name to be included in the analysis">
        <param name="sample_name" type="text" label="Sample name" />
    </repeat>
  <tests>
      <test>
          <param name="reference_source_selector" value="history" />
          <param name="ref_file" value="phiX.fasta" ftype="fasta" />
          <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" />
          <param name="number" value="-1" />
          <param name="platform" value="" />
          <param name="read_group" value="" />
          <param name="sample_file_repeat" value="0" />
          <param name="sample_name_repeat" value="0" />
          <param name="gatk_param_type_selector" value="basic" />
          <output name="output_bam" file="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" compare="contains"/> 
          <output name="output_log" file="gatk/gatk_print_reads/gatk_print_reads_out_1.log.contains" compare="contains" />
      </test>
  </tests>
  <help>
**What it does**

PrintReads can dynamically merge the contents of multiple input BAM files, resulting in merged output sorted in coordinate order.

For more information on the GATK Print Reads Walker, see this `tool specific page &lt;http://www.broadinstitute.org/gsa/gatkdocs/release/org_broadinstitute_sting_gatk_walkers_PrintReadsWalker.html&gt;`_.

To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3&gt;`_.

If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions&gt;`_.

------

**Inputs**

GenomeAnalysisTK: PrintReads accepts one or more BAM or SAM input files.


**Outputs**

The output is in BAM format.


Go `here &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK&gt;`_ for details on GATK file formats.

-------

**Settings**::

 number         int     -1     Print the first n reads from the file, discarding the rest
 platform         String     NA     Exclude all reads with this platform from the output
 readGroup         String     NA     Exclude all reads with this read group from the output
 sample_file     Set[File]     []     File containing a list of samples (one per line). Can be specified multiple times
 sample_name     Set[String]     []     Sample name to be included in the analysis. Can be specified multiple times.

@CITATION_SECTION@
  </help>
</tool>