diff gsnap.xml @ 8:a89fec682254

gmap/gsnap updated to version 2011-11-30
author Jim Johnson <jj@umn.edu>
date Thu, 08 Dec 2011 11:00:46 -0600
parents 561503a442f0
children
line wrap: on
line diff
--- a/gsnap.xml	Tue Nov 08 13:26:41 2011 -0600
+++ b/gsnap.xml	Thu Dec 08 11:00:46 2011 -0600
@@ -1,12 +1,7 @@
-<tool id="gsnap" name="GSNAP" version="2.0.0">
+<tool id="gsnap" name="GSNAP" version="2.0.1">
   <description>Genomic Short-read Nucleotide Alignment Program</description>
   <requirements>
       <requirement type="binary">gsnap</requirement>
-      <!-- proposed tag for added datatype dependencies -->
-      <requirement type="datatype">gmapdb</requirement>
-      <requirement type="datatype">gmapsnpindex</requirement>
-      <requirement type="datatype">splicesites.iit</requirement>
-      <requirement type="datatype">introns.iit</requirement>
   </requirements>
   <version_string>gsnap --version</version_string>
   <command>
@@ -25,10 +20,16 @@
     #if $refGenomeSource.use_splicing.src == 'gmapdb':
       #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0:
         -s $refGenomeSource.use_splicing.splicemap.value
+        #if $computation.trim_mismatch_score.__str__ == '0':
+          $ambig_splice_noclip
+        #end if
       #end if
     #elif $refGenomeSource.use_splicing.src == 'history':
       #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0:
         -S $os.path.dirname($refGenomeSource.use_splicing.splicemap) -s $os.path.basename($refGenomeSource.use_splicing.splicemap)
+        #if $computation.trim_mismatch_score.__str__ == '0':
+          $ambig_splice_noclip
+        #end if
       #end if
     #end if
     #if $refGenomeSource.use_snps.src == 'gmapdb':
@@ -43,9 +44,11 @@
     #if $refGenomeSource.mode.__str__ != '':
       --mode=$refGenomeSource.mode
     #end if
+    #* ## No longer in options as of version 2011-11-30
     #if $mapq_unique_score.__str__ != '':
       --mapq-unique-score=$mapq_unique_score
     #end if
+    *#
     #if $computation.options == "advanced":
       #if $computation.max_mismatches.__str__ != '':
         --max-mismatches=$computation.max_mismatches
@@ -82,6 +85,9 @@
       #if $computation.trim_mismatch_score.__str__ != '':
         --trim-mismatch-score=$computation.trim_mismatch_score
       #end if
+      #if $computation.trim_indel_score.__str__ != '':
+        --trim-indel-score=$computation.trim_indel_score
+      #end if
       ## TODO - do we need these options (Is it tally XOR runlength?):
       ## --tallydir=  --use-tally=tally
       ## --runlengthdir  --use-runlength=runlength
@@ -206,6 +212,12 @@
         #if $seq.paired.pairmax_rna.__str__ != '':
           --pairmax-rna=$seq.paired.pairmax_rna
         #end if
+        #if $seq.paired.pairexpect.__str__ != '':
+          --pairexpect=$seq.paired.pairexpect
+        #end if
+        #if $seq.paired.pairdev.__str__ != '':
+          --pairdev=$seq.paired.pairdev
+        #end if
         $seq.fastq $seq.paired.fastq
       #else
         $seq.fastq
@@ -245,7 +257,11 @@
               <option value="FF">fwd-fwd, same strand</option>
             </param>
             <param name="pairmax_dna"  type="integer" value="" optional="true" label="Max total genomic length for DNA-Seq paired reads, or other reads without splicing (default 1000)." help="Used if no splice file is provided and novelsplicing is off."/>
-            <param name="pairmax_rna"  type="integer" value="" optional="true" label="Max total genomic length for RNA-Seq paired reads, or other reads that could have a splice (default 200000)." help="Used novelspliceing is specified or a splice file is provided.  Should probably match the value for localsplicedist."/>
+            <param name="pairmax_rna"  type="integer" value="" optional="true" label="Max total genomic length for RNA-Seq paired reads, or other reads that could have a splice (default 200000)." help="Used when novel splicing is specified or a splice file is provided.  Should probably match the value for localsplicedist."/>
+            <param name="pairexpect"  type="integer" value="" optional="true" label="Expected paired-end length" 
+                   help="Used for calling splices in medial part of paired-end reads (default 200)"/>
+            <param name="pairdev"  type="integer" value="" optional="true" label="Allowable deviation from expected paired-end length" 
+                   help="Used for calling splices in medial part of paired-end reads (default 25)"/>
           </when>
         </conditional>
         <param name="barcode_length" type="integer" value="" optional="true"  label="Amount of barcode to remove from start of read (default 0)" />
@@ -281,9 +297,11 @@
       </when>
       
     </conditional>
+    <!-- No longer in options as of version 2011-11-30
     <param name="mapq_unique_score"  type="integer" value="" optional="true" label="MAPQ score threshold" 
                 help="For multiple results, consider as a unique result if only one of the results has a MAPQ score equal or greater than this
                       (if not selected, then reports all multiple results, up to npaths)" />
+    -->
 
     <!-- GMAPDB for alignment -->
     <conditional name="refGenomeSource">
@@ -405,6 +423,9 @@
           <when value="history">
             <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map" 
               help="built with GMAP IIT"/>
+            <param name="ambig_splice_noclip"  type="boolean" checked="false" truevalue="--ambig-splice-noclip" falsevalue="" label="Do not clip at ambiguous splice sites"
+              help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron.  
+                    This flag makes sense only if you are trying to eliminate all soft clipping with --trim-mismatch-score=0"/>
           </when>
           <when value="gmapdb">
             <param name="splicemap" type="select"  data_ref="gmapdb" label="Use map for splicing involving known sites or known introns" help="">
@@ -412,6 +433,9 @@
                 <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/>
               </options>
             </param>
+            <param name="ambig_splice_noclip"  type="boolean" checked="false" truevalue="--ambig-splice-noclip" falsevalue="" label="Do not clip at ambiguous splice sites"
+              help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron.  
+                    This flag makes sense only if you are trying to eliminate all soft clipping with --trim-mismatch-score=0"/>
           </when>
         </conditional>
 
@@ -457,8 +481,12 @@
          </param>
          <param name="query_unk_mismatch" type="boolean" checked="false" truevalue="--query-unk-mismatch=1" falsevalue="" label="Count unknown (N) characters in the query as a mismatch"/>
          <param name="genome_unk_mismatch" type="boolean" checked="true" truevalue="" falsevalue="--genome-unk-mismatch=0" label="Count unknown (N) characters in the genome as a mismatch"/>
-         <param name="terminal_threshold"  type="integer" value="" optional="true" label="Threshold for searching for a terminal alignment (default 3)" 
-                help="(from one end of the read to the best possible position at the other end).  To turn off terminal alignments, set this to a high value." />
+         <param name="terminal_threshold"  type="integer" value="" optional="true" label="Threshold for searching for a terminal alignment (default 2)" 
+                help="(from one end of the read to the best possible position at the other end).   For example, if this value is 2, then if GSNAP finds an exact or
+                                   1-mismatch alignment, it will not try to find a terminal alignment.
+                                   Note that this default value may not be low enough if you want to
+                                   obtain terminal alignments for very short reads, although such reads
+                                   probably don't have enough specificity for terminal alignments anyway." />
          <param name="indel_penalty"  type="integer" value="" optional="true" label="Penalty for an indel (default 2)" 
                 help="Counts against mismatches allowed.  To find indels, make indel-penalty less than or equal to max-mismatches.  A value &lt; 2 can lead to false positives at read ends" />
          <param name="indel_endlength"  type="integer" value="" optional="true" label="Minimum length at end required for indel alignments (default 4)" />
@@ -474,7 +502,9 @@
            <option value="off">off</option>
          </param>
          <param name="trim_mismatch_score" type="integer" value="" optional="true" label="Score to use for mismatches when trimming at ends (default is -3)" 
-                help="to turn off trimming, specify 0"/>
+                help="to turn off trimming, specify 0 (Warning: turning trimming off will give false positive mismatches at the ends of reads)"/>
+         <param name="trim_indel_score" type="integer" value="" optional="true" label="Score to use for indels when trimming at ends (default is -4)" 
+                help="to turn off trimming, specify 0 (Warning: turning trimming off will give false positive indels at the ends of reads)"/>
          <param name="use_tally" type="data" format="tally.iit" optional="true" metadata_name="dbkey" label="Select a tally IIT file to resolve concordant multiple results" 
               help="generated by gsnap_tally and iit_store"/>