Mercurial > repos > devteam > picard

diff picard_MergeBamAlignment.xml @ 5:3d4f1fa26f0e draft
Uploaded
author: devteam
date: Tue, 16 Dec 2014 19:03:21 -0500
children: 3a3234d7a2e8
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/picard_MergeBamAlignment.xml	Tue Dec 16 19:03:21 2014 -0500
@@ -0,0 +1,329 @@
+<tool name="MergeBamAlignment" id="picard_MergeBamAlignment" version="1.126.0">
+  <description>merge alignment data with additional info stored in an unmapped BAM dataset</description>
+  <requirements><requirement type="package" version="1.126.0">picard</requirement></requirements>
+  
+  <macros>
+    <import>picard_macros.xml</import>
+  </macros>
+  
+  <command>
+    @java_options@
+    #set $picard_dict = "localref.dict"
+    #set $ref_fasta = "localref.fa"     ## This is done because picards "likes" .fa extension
+    
+    ln -s "${reference_source.ref_file}" "${ref_fasta}" &amp;&amp;
+       
+    #if str( $reference_source.reference_source_selector ) == "history":
+        
+      java -jar \$JAVA_JAR_PATH/picard.jar CreateSequenceDictionary REFERENCE="${ref_fasta}" OUTPUT="${picard_dict}"
+      QUIET=true
+      VERBOSITY=ERROR
+      
+      &amp;&amp;
+      
+    #else:
+    
+      #set $ref_fasta = str( $reference_source.ref_file.fields.path )
+    
+    #end if
+    
+    java -jar \$JAVA_JAR_PATH/picard.jar
+    MergeBamAlignment
+      UNMAPPED_BAM="${unmapped_bam}"
+      
+      PAIRED_RUN=true ##This argument is ignored and will be removed. Required. Possible values: {true, false}
+      
+      #if str( $aligned_or_read1_and_read2.aligned_or_read1_and_read2_selector ) == "paired_one_file":
+        #for $dataset in $aligned_or_read1_and_read2.aligned_bams:
+          ALIGNED_BAM="${dataset.aligned_bam}"
+        #end for
+      #elif str( $aligned_or_read1_and_read2.aligned_or_read1_and_read2_selector ) == "paired_two_files":
+        #for $dataset in $aligned_or_read1_and_read2.read1_aligned_bams:
+          READ1_ALIGNED_BAM="${dataset.read1_aligned_bam}"
+        #end for
+        #for $dataset in $aligned_or_read1_and_read2.read2_aligned_bams:
+          READ2_ALIGNED_BAM="${dataset.read1_aligned_bam}"
+        #end for
+      #else
+        #for $dataset in $aligned_or_read1_and_read2.read1_aligned_bams:
+          READ1_ALIGNED_BAM="${dataset.read1_aligned_bam}"
+        #end for
+      #end if
+     
+      OUTPUT="${outFile}"
+      REFERENCE_SEQUENCE="${ref_fasta}"
+      
+      CLIP_ADAPTERS="${clip_adapters}"
+      IS_BISULFITE_SEQUENCE="${is_bisulfite_sequence}"
+      ALIGNED_READS_ONLY="${aligned_reads_only}"
+      MAX_INSERTIONS_OR_DELETIONS="${max_insertions_or_deletions}"
+      
+      #for $attribute in $attributes_to_retain:
+        ATTRIBUTES_TO_RETAIN="${$attribute.attribute}"
+      #end for
+      
+      #for $attribute in $attributes_to_remove:
+        ATTRIBUTES_TO_REMOVE="${$attribute.attribute}"
+      #end for
+    
+      READ1_TRIM="${read1_trim}"
+      READ2_TRIM="${read2_trim}"
+      
+      #if str( $orientations ) != "None":
+        #for $orientation in str( $orientations ).split(','):   ## See trello card https://trello.com/c/9nW02Zhd
+          EXPECTED_ORIENTATIONS="${orientation}"
+        #end for
+      #end if
+      
+      ALIGNER_PROPER_PAIR_FLAGS="${aligner_proper_pair_flags}"      
+      PRIMARY_ALIGNMENT_STRATEGY="${primary_alignment_strategy}"
+      CLIP_OVERLAPPING_READS="${clip_overlapping_reads}"
+      INCLUDE_SECONDARY_ALIGNMENTS="${include_secondary_alignments}"
+      ADD_MATE_CIGAR="${add_mate_cigar}"
+      
+      VALIDATION_STRINGENCY="${validation_stringency}"
+
+      SORT_ORDER=coordinate
+      QUIET=true
+      VERBOSITY=ERROR
+      
+  </command>
+  
+  <inputs>
+    
+     <conditional name="reference_source">
+      <param name="reference_source_selector" type="select" label="Load reference genome from">
+        <option value="cached">Local cache</option>
+        <option value="history">History</option>
+      </param>
+      <when value="cached">
+        <param name="ref_file" type="select" label="Use dictionary from the list" help="Select genome from the list">
+          <options from_data_table="picard_indexes">
+            <filter type="sort_by" column="2" />
+            <validator type="no_options" message="No indexes are available" />
+          </options>
+          <validator type="no_options" message="A built-in dictionary is not available for the build associated with the selected input file"/>
+        </param>
+      </when>
+      <when value="history"> 
+        <param name="ref_file" type="data" format="fasta" label="Use the following dataset to create dictionary" help="You can upload a FASTA sequence to the history from which Picard will automatically generate dictionary using CreateSequenceDictionary command" />
+      </when>
+    </conditional>
+    
+    <param format="sam,bam" name="unmapped_bam" type="data" label="Selected unaligned SAM or BAM with original reads" help="UNMAPPED_BAM; This dataset must be sorted in queryname order (use picard_SortSam to do this)" />
+    <conditional name="aligned_or_read1_and_read2">
+      <param name="aligned_or_read1_and_read2_selector" type="select" label="What type of aligned data do you have?">
+        <option value="paired_one_file">Paired data in a single BAM file (ALIGNED_BAM)</option>
+        <option value="paired_two_files">Paired data in separate files (READ1_ALIGNED_BAM and READ2_ALIGNED_BAM)</option>
+        <option value="single_file">Singe end data (READ1_ALIGNED_BAM)</option>
+      </param>
+      <when value="paired_one_file">
+        <repeat name="aligned_bams" title="Aligned SAM or BAM dataset" min="1" help="You can provide multiple datasets">
+          <param name="aligned_bam" type="data" format="sam,bam" label="SAM or BAM dataset(s) with alignment data" help="ALIGNED_BAM"/>
+        </repeat>
+      </when>
+      <when value="paired_two_files">
+        <repeat name="read1_aligned_bams" title="Aligned SAM or BAM dataset for Read 1" min="1" help="You can provide multiple datasets">
+          <param name="read1_aligned_bam" type="data" format="sam,bam" label="SAM or BAM dataset(s) with alignment data for Read1" help="READ1_ALIGNED_BAM"/>
+        </repeat>
+        <repeat name="read2_aligned_bams" title="Aligned SAM or BAM dataset for Read 2" min="1" help="You can provide multiple datasets">
+          <param name="read2_aligned_bam" type="data" format="sam,bam" label="SAM or BAM dataset(s) with alignment data for Read2" help="READ2_ALIGNED_BAM"/>
+        </repeat>
+      </when>
+      <when value="single_file">
+        <repeat name="read1_aligned_bams" title="Aligned SAM or BAM dataset for Single Reads" min="1" help="You can provide multiple datasets">
+          <param name="read1_aligned_bam" type="data" format="sam,bam" label="SAM or BAM dataset(s) with alignment data" help="READ1_ALIGNED_BAM"/>
+        </repeat>
+      </when>
+    </conditional>
+    
+    <param name="clip_adapters" type="boolean" checked="true" label="Whether to clip adapters where identified" help="CLIP_ADAPTERS; default=True"/>
+    <param name="is_bisulfite_sequence" type="boolean" label="Whether the data is from bisulfite sequencing (used when caculating the NM tag)" help="IS_BISULFITE_SEQUENCE; default=False"/>
+    <param name="aligned_reads_only" type="boolean" label="Whether to output only aligned reads" help="ALIGNED_READS_ONLY; default=False"/>
+    <param name="max_insertions_or_deletions" type="integer" value="1" label="The maximum number of insertions or deletions permitted for an alignment to be included" help="MAX_INSERTIONS_OR_DELETIONS; Alignments with more than this many insertions or deletions will be ignored. Set to -1 to allow any number of insertions or deletions. default=1"/>
+ 
+    <repeat name="attributes_to_retain" title="Retain the following alignment attribute" min="0" help="You can provide multiple attributes">
+      <param name="attribute" type="text" size="4" label="Reserved alignment attributes (tags starting with X, Y, or Z) that should be brought over from the alignment data when merging" help="ATTRIBUTES_TO_RETAIN; example: XA"/>
+    </repeat>
+    
+    <repeat name="attributes_to_remove" title="Remove the following alignment attribute" min="0" help="You can provide multiple attributes">
+      <param name="attribute" type="text" size="4" label="Attributes from the alignment record that should be removed when merging." help="ATTRIBUTES_TO_REMOVE; This overrides ATTRIBUTES_TO_RETAIN if they share common tags"/>
+    </repeat>
+    
+    <param name="read1_trim" type="integer" value="0" label="The number of bases trimmed from the beginning of read 1 prior to alignment" help="READ1_TRIM; default=0"/>
+    <param name="read2_trim" type="integer" value="0" label="The number of bases trimmed from the beginning of read 2 prior to alignment" help="READ2_TRIM; default=0"/>
+    
+    <param name="orientations" type="select" multiple="True" display="checkboxes" label="The expected orientation of proper read pairs" help="EXPECTED_ORIENTATIONS; multiple orinetations can be selected">
+      <option value="FR">Forward/Reverse (FR)</option>
+      <option value="RF">Reverse/Forward (RF)</option>
+      <option value="TANDEM">Tandem</option>
+    </param>
+ 
+    
+    <param name="aligner_proper_pair_flags" type="boolean" label="Use the aligner's idea of what a proper pair is rather than computing in this program" help="ALIGNER_PROPER_PAIR_FLAGS; default=False"/>
+    
+    <param name="primary_alignment_strategy" type="select" label="Strategy for selecting primary alignment when the aligner has provided more than one alignment for a pair or fragments" help="PRIMARY_ALIGNMENT_STRATEGY; see help below for more info; default=BestMapq">
+      <option value="BestMapq" selected="True">BestMapq</option>
+      <option value="EarliestFragment">EarliestFragment</option>
+      <option value="BestEndMapq">BestEndMapq</option>
+      <option value="MostDistant">MostDistant</option>
+    </param>
+    
+    <param name="clip_overlapping_reads" type="boolean" checked="True" label="For paired reads, soft clip the 3' end of each read if necessary so that it does not extend past the 5' end of its mate" help="CLIP_OVERLAPPING_READS; default=True"/>
+    <param name="include_secondary_alignments" type="boolean" checked="True" label="If false, do not write secondary alignments to output" help="INCLUDE_SECONDARY_ALIGNMENTS; default=True"/>
+    <param name="add_mate_cigar" type="boolean" checked="True" label="Adds the mate CIGAR tag (MC) if true, does not if false" help="ADD_MATE_CIGAR; default=True"/>
+    <expand macro="VS" />
+  </inputs>
+  <outputs>
+    <data name="outFile" format="bam" label="${tool.name} on ${on_string}: BAM with merged alignments"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="reference_source_selector" value="history" />
+      <param name="ref_file" value="picard_MergeBamAlignment_ref.fa" ftype="fasta" />
+      <param name="unmapped_bam" value="picard_MergeBamAlignment_unaligned.bam" ftype="bam"/>
+      <param name="aligned_or_read1_and_read2_selector" value="paired_one_file"/>
+      <param name="aligned_bam" value="picard_MergeBamAlignment_aligned.bam" ftype="bam"/>
+      <param name="clip_adapters" value="True"/>
+      <param name="is_bisulfite_sequence" value="False"/>
+      <param name="aligned_reads_only" value="False"/>
+      <param name="max_insertions_or_deletions" value="1"/>
+      <param name="read1_trim" value="0"/>
+      <param name="read2_trim" value="0"/>
+      <param name="orientation" value="FR"/>
+      <param name="aligner_proper_pair_flags" value="False"/>
+      <param name="primary_alignment_strategy" value="BestMapq"/>
+      <param name="clip_overlapping_reads" value="True"/>
+      <param name="include_secondary_alignments" value="True"/>
+      <param name="add_mate_cigar" value="True"/>
+      <output name="outFile" file="picard_MergeBamAlignment_test1.bam" ftype="bam" lines_diff="2"/>
+    </test>
+  </tests>
+  
+  <stdio>
+    <exit_code range="1:"  level="fatal"/>
+  </stdio>
+  
+  <help>
+
+.. class:: infomark
+
+**Purpose**
+
+Merges alignment data from a SAM or BAM dataset with additional data stored in an unmapped BAM dataset and produces a third SAM or BAM dataset of aligned and unaligned reads.
+
+@dataset_collections@
+
+@description@
+
+  UNMAPPED_BAM=File
+  UNMAPPED=File                 Original SAM or BAM file of unmapped reads, which must be in queryname order.  Required. 
+  
+  ALIGNED_BAM=File
+  ALIGNED=File                  SAM or BAM file(s) with alignment data.  This option may be specified 0 or more times.  
+                                Cannot be used in conjuction with option(s) READ1_ALIGNED_BAM (R1_ALIGNED) 
+                                READ2_ALIGNED_BAM (R2_ALIGNED)
+  
+  READ1_ALIGNED_BAM=File
+  R1_ALIGNED=File               SAM or BAM file(s) with alignment data from the first read of a pair.  This option may be 
+                                specified 0 or more times.  Cannot be used in conjuction with option(s) ALIGNED_BAM 
+                                (ALIGNED)
+  
+  READ2_ALIGNED_BAM=File
+  R2_ALIGNED=File               SAM or BAM file(s) with alignment data from the second read of a pair.  This option may 
+                                be specified 0 or more times.  Cannot be used in conjuction with option(s) ALIGNED_BAM 
+                                (ALIGNED)
+  
+  PAIRED_RUN=Boolean
+  PE=Boolean                    This argument is ignored and will be removed.  Required. Possible values: {true, false} 
+  
+  JUMP_SIZE=Integer
+  JUMP=Integer                  The expected jump size (required if this is a jumping library). Deprecated. Use 
+                                EXPECTED_ORIENTATIONS instead  Default value: null.  Cannot be used in conjuction with 
+                                option(s) EXPECTED_ORIENTATIONS (ORIENTATIONS)
+  
+  CLIP_ADAPTERS=Boolean         Whether to clip adapters where identified.  Default value: true. Possible values: {true, false} 
+  
+  IS_BISULFITE_SEQUENCE=Boolean Whether the lane is bisulfite sequence (used when caculating the NM tag).  Default value: 
+                                false. Possible values: {true, false} 
+  
+  ALIGNED_READS_ONLY=Boolean    Whether to output only aligned reads. Default value: false. Possible values: {true, false} 
+  
+  MAX_INSERTIONS_OR_DELETIONS=Integer
+  MAX_GAPS=Integer              The maximum number of insertions or deletions permitted for an alignment to be included. 
+                                Alignments with more than this many insertions or deletions will be ignored. Set to -1 to 
+                                allow any number of insertions or deletions.  Default value: 1.
+  
+  ATTRIBUTES_TO_RETAIN=String   Reserved alignment attributes (tags starting with X, Y, or Z) that should be brought over 
+                                from the alignment data when merging.  This option may be specified 0 or more times. 
+  
+  ATTRIBUTES_TO_REMOVE=String   Attributes from the alignment record that should be removed when merging.  This overrides 
+                                ATTRIBUTES_TO_RETAIN if they share common tags.  This option may be specified 0 or more 
+                                times. 
+  
+  READ1_TRIM=Integer
+  R1_TRIM=Integer               The number of bases trimmed from the beginning of read 1 prior to alignment  Default 
+                                value: 0. 
+  
+  READ2_TRIM=Integer
+  R2_TRIM=Integer               The number of bases trimmed from the beginning of read 2 prior to alignment  Default 
+                                value: 0. 
+  
+  EXPECTED_ORIENTATIONS=PairOrientation
+  ORIENTATIONS=PairOrientation  The expected orientation of proper read pairs. Replaces JUMP_SIZE  Possible values: {FR, 
+                                RF, TANDEM} This option may be specified 0 or more times.  Cannot be used in conjuction 
+                                with option(s) JUMP_SIZE (JUMP)
+  
+  ALIGNER_PROPER_PAIR_FLAGS=Boolean
+                                Use the aligner's idea of what a proper pair is rather than computing in this program.  
+                                Default value: false. Possible values: {true, false} 
+  
+  SORT_ORDER=SortOrder
+  SO=SortOrder                  The order in which the merged reads should be output.  Default value: coordinate.
+                                Possible values: {unsorted, queryname, coordinate} 
+  
+  PRIMARY_ALIGNMENT_STRATEGY=PrimaryAlignmentStrategy
+                                Strategy for selecting primary alignment when the aligner has provided more than one 
+                                alignment for a pair or fragment, and none are marked as primary, more than one is marked 
+                                as primary, or the primary alignment is filtered out for some reason. BestMapq expects 
+                                that multiple alignments will be correlated with HI tag, and prefers the pair of 
+                                alignments with the largest MAPQ, in the absence of a primary selected by the aligner. 
+                                EarliestFragment prefers the alignment which maps the earliest base in the read. Note 
+                                that EarliestFragment may not be used for paired reads. BestEndMapq is appropriate for 
+                                cases in which the aligner is not pair-aware, and does not output the HI tag. It simply 
+                                picks the alignment for each end with the highest MAPQ, and makes those alignments 
+                                primary, regardless of whether the two alignments make sense together.MostDistant is also 
+                                for a non-pair-aware aligner, and picks the alignment pair with the largest insert size. 
+                                If all alignments would be chimeric, it picks the alignments for each end with the best 
+                                MAPQ.  For all algorithms, ties are resolved arbitrarily.  Default value: BestMapq.
+                                Possible values: {BestMapq, EarliestFragment, BestEndMapq, MostDistant} 
+  
+  CLIP_OVERLAPPING_READS=BooleanFor paired reads, soft clip the 3' end of each read if necessary so that it does not 
+                                extend past the 5' end of its mate.  Default value: true. Possible values: {true, false} 
+  
+  INCLUDE_SECONDARY_ALIGNMENTS=Boolean
+                                If false, do not write secondary alignments to output.  Default value: true.
+                                Possible values: {true, false} 
+  
+  ADD_MATE_CIGAR=Boolean
+  MC=Boolean                    Adds the mate CIGAR tag (MC) if true, does not if false. Possible values: {true, false} 
+
+
+
+
+@more_info@
+  </help>
+</tool>
+
+
+
+
+
+
+
+
+
+
+
+
+
author	devteam
date	Tue, 16 Dec 2014 19:03:21 -0500
parents
children	3a3234d7a2e8