diff find_intervals.xml @ 17:a3af29edcce2

Uploaded Miller Lab Devshed version a51c894f5bed
author miller-lab
date Fri, 28 Sep 2012 11:57:18 -0400
parents 8ae67e9fb6ff
children d6b961721037
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/find_intervals.xml	Fri Sep 28 11:57:18 2012 -0400
@@ -0,0 +1,142 @@
+<tool id="gd_find_intervals" name="Remarkable Intervals" version="1.0.0">
+  <description>: Find high-scoring runs of SNPs</description>
+
+  <command interpreter="python">
+    find_intervals.py "$input" "$input.metadata.dbkey" "$output" "$output.files_path"
+
+    #if $override_metadata.choice == "0"
+      "$input.metadata.ref" "$input.metadata.rPos"
+    #else
+      "$override_metadata.ref_col" "$override_metadata.rpos_col"
+    #end if
+
+    "$score_col" "$shuffles"
+
+    #if $cutoff.type == 'percentage'
+      "$cutoff.cutoff_pct"
+    #else
+      "=$cutoff.cutoff_val"
+    #end if
+
+    "$out_format"
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="tabular" label="Input">
+      <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" />
+    </param>
+
+    <param name="score_col" type="data_column" data_ref="input" numerical="true" label="Column with score"/>
+
+    <conditional name="cutoff">
+      <param name="type" type="select" label="Cutoff type">
+        <option value="percentage">percentage</option>
+        <option value="value">value</option>
+      </param>
+      <when value="percentage">
+        <param name="cutoff_pct" type="float" value="95" min="0" max="100" label="Percentage cutoff"/>
+      </when>
+      <when value="value">
+        <param name="cutoff_val" type="float" value="0.0" label="Value cutoff"/>
+      </when>
+    </conditional>
+
+    <param name="shuffles" type="integer" min="0" value="0" label="Number of randomizations"/>
+
+    <param name="out_format" type="select" format="integer" label="Report individual positions">
+      <option value="0" selected="true">No</option>
+      <option value="1">Yes</option>
+    </param>
+
+    <conditional name="override_metadata">
+      <param name="choice" type="select" format="integer" label="Choose columns" help="Note: you need to choose the columns if the input dataset is not gd_snp">
+        <option value="0" selected="true">No, get columns from metadata</option>
+        <option value="1" >Yes, choose columns</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome" help="Note: be sure the build in the metadata is the same as using here."/>
+        <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position" help="Note: either zero or one based positions will work"/>
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="interval">
+        <change_format>
+            <when input="out_format" value="1" format="bigwigpos" />
+        </change_format>
+    </data>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="score_col" value="5" />
+      <param name="type" value="value" />
+      <param name="cutoff_val" value="700.0" />
+      <param name="shuffles" value="10" />
+      <param name="out_format" value="0" />
+      <param name="choice" value="0" />
+
+      <output name="output" file="test_out/find_intervals/find_intervals.interval" />
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is tabular_, with required columns of chromosome, position,
+and score (in any column).
+The output dataset is interval_.  (`Dataset missing?`_)
+
+.. _interval: ./static/formatHelp.html#interval
+.. _tabular: ./static/formatHelp.html#tab
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user selects a tabular dataset (such as a gd_snp dataset) and 
+if the dataset is not also gd_snp format, specifies 
+the columns containing chromosome, position, and scores (such as an Fst-value for the SNP). 
+For gd_snp format the metadata can be used to specify the chromosome and 
+position.
+Other inputs include
+a percentage or raw score for the "cutoff" which should be greater than the 
+average value for the scores column.  A higher value will give smaller intervals
+in the output.
+If a percentage (e.g. 95%) is specified
+then that percentile of the scores is used as the cutoff; 
+percentile may not work well if many rows or SNPs have the same score
+(in that case use a raw score).  The program subtracts the
+cutoff from every score, then finds genomic intervals (i.e., consecutive runs
+of SNPs) whose total score cannot be increased by adding or subtracting one
+or more adjusted scores at the ends of the interval.
+Another input is the number of times the
+data should be randomized (only intervals with score exceeding the maximum for
+the randomized data are reported).  
+If 100 shuffles are requested, then any interval reported by the tool has a 
+score with probability less than 0.01 of being equaled or exceeded by chance.
+
+-----
+
+**Example**
+
+- input (gd_snp)::
+
+    Contig222_chr2_9817738_9818143   220     C       T       888.0   chr2    9817960         C       17      0       2       78      12      0       2       63      20      0       2       87      8       0       2       51      11      0       2       60      12      0       2       63      Y       76      0.093   1
+    Contig47_chr2_25470778_25471576  126     G       A       888.0   chr2    25470896        G       12      0       2       63      14      0       2       69      14      0       2       69      10      0       2       57      18      0       2       81      13      0       2       66      N       11      0.289   1
+    ...
+    Contig115_chr2_61631913_61632510 310     G       T       999.3   chr2    61632216        G       7       0       2       48      9       0       2       54      7       0       2       48      11      0       2       60      10      0       2       57      10      0       2       57      N       13      0.184   0
+    Contig31_chr2_67331584_67331785  39      C       T       999.0   chr2    67331623        C       11      0       2       60      10      0       2       57      7       0       2       48      9       0       2       54      2       0       2       33      4       0       2       39      N       110     0.647   1
+    etc.
+
+- output not reporting individual positions::
+
+    chr2    9817960 67331624        1272.2000
+
+  </help>
+</tool>