view picard_FastqToSam.xml @ 6:4ff1e04010e6 draft

Uploaded
author devteam
date Thu, 15 Jan 2015 15:37:04 -0500
parents 3d4f1fa26f0e
children 3a3234d7a2e8
line wrap: on
line source

<tool name="FastqToSam" id="picard_FastqToSam" version="1.126.0">
  <description>convert Fastq data into unaligned BAM</description>
  <requirements>
    <requirement type="package" version="1.126.0">picard</requirement>
  </requirements>
  
  <macros>
    <import>picard_macros.xml</import>
  </macros>
  
  <command>
    @java_options@
    
    java -jar \$JAVA_JAR_PATH/picard.jar
    FastqToSam
    
    #if str( $input_type.input_type_selector ) == "se":
      FASTQ="${input_type.fastq}"
    #elif str( $input_type.input_type_selector ) == "pe":
      FASTQ="${input_type.fastq}"
      FASTQ2="${input_type.fastq2}"
    #else
      FASTQ="${input_type.fastq.forward}"
      FASTQ2="${input_type.fastq.reverse}"
    #end if
    
    QUALITY_FORMAT="${quality_format}"
    OUTPUT="${outFile}"
    READ_GROUP_NAME="${read_group_name}"
    SAMPLE_NAME="${sample_name}"
    
    #if str( $library_name ):
      LIBRARY_NAME="${library_name}"
    #end if
    
    #if str( $platform_unit ):
      PLATFORM_UNIT="${platform_unit}"
    #end if
      
    #if str( $platform ):
      PLATFORM="${platform}"
    #end if
      
    #if str( $sequencing_center ):
      SEQUENCING_CENTER="${sequencing_center}"
    #end if
      
    #if str( $predicted_insert_size ):
      PREDICTED_INSERT_SIZE="${predicted_insert_size}"
    #end if
      
    #if str( $comment ):
      COMMENT="${comment}"
    #end if
      
    #if str( $description ):
      DESCRIPTION="${description}"
    #end if
      
    #if str( $run_date ):
      RUN_DATE="${run_date}"
    #end if
    
    MIN_Q="${min_q}"
    MAX_Q="${max_q}"
    STRIP_UNPAIRED_MATE_NUMBER="${strip_unpairied_mate_number}"
    ALLOW_AND_IGNORE_EMPTY_LINES="${allow_and_ignore_empty_lines}"
    
    SORT_ORDER=coordinate
    VALIDATION_STRINGENCY="${validation_stringency}"
    QUIET=true
    VERBOSITY=ERROR
  
  </command>
  <inputs>
    <conditional name="input_type">
      <param name="input_type_selector" type="select" label="What is your input data" help="Select between single end, paired end, and collections. See help below for full explanation of dataset types">
        <option value="se">Single end (single dataset)</option>
        <option value="pe">Paired end (two datasets)</option>
        <option value="pc">Paired collection</option>
      </param>
      <when value="se">
        <param name="fastq" type="data" format="fastq" label="Input fastq file for single end data" help="FASTQ"/>
      </when>
      <when value="pe">
        <param name="fastq" type="data" format="fastq" label="Input fastq file for the first read in paired end data" help="FASTQ"/>
        <param name="fastq2" type="data" format="fastq" label="Input fastq file for the second read of paired end data" help="FASTQ2"/>
      </when>
      <when value="pc">
        <param name="fastq" type="data_collection" collection_type="paired" label="FASTQ paired dataset collection" help="FASTQ and FASTQ2; A collection of two datasets with forward and reverse reads. See help below on explanation of dataset collections"/>
      </when>
    </conditional>
      
    <param name="quality_format" type="select" label="Select quality encoding scheme" help="QUALITY_FORMAT">
      <option value="Standard" selected="True">Sanger (+33)</option>
      <option value="Illumina">Illumina (+64)</option>
      <option value="Solexa">Solexa (+66)</option>
    </param>

    <param name="read_group_name" type="text" size="20" value="A" label="Read group name" help="READ_GROUP_NAME"/>
    <param name="sample_name" type="text" size="20" value="sample-a" label="Sample name" help="SAMPLE_NAME"/>
    <param name="library_name" type="text" size="20" optional="True" label="The library name" help="LIBRARY_NAME; Optional"/>
    <param name="platform_unit" type="text" size="20" optional="True"  label="The platform unit (often run_barcode.lane)" help="PLATFORM_UNIT; Optional"/>
    <param name="platform" type="text" size="20" optional="True"  label="The platform type (e.g. illumina, 454)" help="PLATFORM; Optional"/>
    <param name="sequencing_center" type="text" size="20" optional="True"  label="The sequencing center from which the data originated" help="SEQUENCING_CENTER; Optional"/>

    <param name="predicted_insert_size" type="integer" min="0" max="100000" optional="True" label="Predicted median insert size, to insert into the read group header" help="PREDICTED_INSERT_SIZE; Optional"/>
    <param name="comment" type="text" size="20" optional="True" label="Comment to include in the output dataset's header" help="COMMENT; Optional"/>
    <param name="description" type="text" size="20" optional="True" label="Optional description information" help="DESCRIPTION; Optional"/>
    <param name="run_date" optional="True" type="text" label="Run date" help="RGDT; Optional; Format=YYYY-MM-DD (eg 1997-07-16)"/>
    <param name="min_q" type="integer" value="0" min="0" max="100" label="Minimum quality allowed in the input fastq" help="MIN_Q; An exception will be thrown if a quality is less than this value; default=0"/>
    <param name="max_q" type="integer" value="93" min="0" max="100" label="Minimum quality allowed in the input fastq" help="MAX_Q; An exception will be thrown if a quality is greater than this value; default=93"/>
    <param name="strip_unpairied_mate_number" type="boolean" truevalue="true" falsevalue="false" label="If true and this is an unpaired fastq any occurance of '/1' will be removed from the end of a read name" help="STRIP_UNPAIRED_MATE_NUMBER; default=false"/>
    <param name="allow_and_ignore_empty_lines" type="boolean" truevalue="true" falsevalue="false" label="Allow (and ignore) empty lines" help="ALLOW_AND_IGNORE_EMPTY_LINES; default=false"/>
    
    <expand macro="VS" />
    
  </inputs> 
  
  <outputs>
    <data format="bam" name="outFile" label="${tool.name} on ${on_string}: reads as unaligned BAM"/>
  </outputs>
  
  <tests>
    <test>
      <param name="input_type_selector" value="pe" />
      <param name="quality_format" value="Standard" />
      <param name="read_group_name" value="A" />
      <param name="sample_name" value="sample-a" />
      <param name="library_name" value="A"/>
      <param name="platform_unit" value="A"/>
      <param name="platform" value="Illumina"/>
      <param name="sequencing_center" value="A"/>
      <param name="predicted_insert_size" value="300"/>
      <param name="comment" value="A"/>
      <param name="description" value="A"/>
      <param name="run_date" value="2014-10-10"/>
      <param name="min_q" value="0" />
      <param name="max_q" value="93" />
      <param name="strip_unpairied_mate_number" value="False" />
      <param name="allow_and_ignore_empty_lines" value="False" />
      <param name="validation_stringency" value="LENIENT"/>
      <param name="fastq" value="picard_FastqToSam_read1.fq" ftype="fastq" />
      <param name="fastq2" value="picard_FastqToSam_read2.fq" ftype="fastq" />
      <output name="outFile" file="picard_FastqToSam_test1.bam" ftype="bam" lines_diff="4"/>
    </test> 
  </tests>
  
  <stdio>
    <exit_code range="1:"  level="fatal"/>
  </stdio>
  
  <help>

.. class:: infomark

**Purpose**

Computes a number of metrics that are useful for evaluating coverage and performance of whole genome sequencing experiments.

@dataset_collections@

@RG@

@description@

  FASTQ=File
  F1=File                       Input fastq file for single end data, or first read in paired end 
                                data.  Required.
                                
  FASTQ2=File
  F2=File                       Input fastq file for the second read of paired end data (if used).  

  QUALITY_FORMAT=FastqQualityFormat
  V=FastqQualityFormat          A value describing how the quality values are encoded in the fastq.  Either Solexa for 
                                pre-pipeline 1.3 style scores (solexa scaling + 66), Illumina for pipeline 1.3 and above 
                                (phred scaling + 64) or Standard for phred scaled scores with a character shift of 33.  
                                If this value is not specified, the quality format will be detected automatically.  
                                Default value: null. Possible values: {Solexa, Illumina, Standard} 

  READ_GROUP_NAME=String
  RG=String                     Read group name  Default value: A. 
  
  SAMPLE_NAME=String
  SM=String                     Sample name to insert into the read group header  Required. 
  
  LIBRARY_NAME=String
  LB=String                     The library name to place into the LB attribute in the read group header.
  
  PLATFORM_UNIT=String
  PU=String                     The platform unit (often run_barcode.lane) to insert into the read group header.
  
  PLATFORM=String
  PL=String                     The platform type (e.g. illumina, solid) to insert into the read group header.
  
  SEQUENCING_CENTER=String
  CN=String                     The sequencing center from which the data originated.
  
  PREDICTED_INSERT_SIZE=Integer
  PI=Integer                    Predicted median insert size, to insert into the read group header.
  
  COMMENT=String
  CO=String                     Comment to include in the merged output file's header. 
  
  DESCRIPTION=String
  DS=String                     Inserted into the read group header. 
  
  RUN_DATE=Iso8601Date
  DT=Iso8601Date                Date the run was produced, to insert into the read group header. 
  
  MIN_Q=Integer                 Minimum quality allowed in the input fastq.  An exception will be thrown if a quality is 
                                less than this value.  Default value: 0.
                                
  MAX_Q=Integer                 Maximum quality allowed in the input fastq.  An exception will be thrown if a quality is 
                                greater than this value.  Default value: 93.
  
  STRIP_UNPAIRED_MATE_NUMBER=Boolean
                                If true and this is an unpaired fastq any occurance of '/1' will be removed from the end 
                                of a read name.  Default value: false.  Possible values: {true, false} 
  
  ALLOW_AND_IGNORE_EMPTY_LINES=Boolean
                                Allow (and ignore) empty lines  Default value: false. Possible values: {true, false} 
  

@more_info@

  </help>
</tool>