comparison find_intervals.xml @ 17:a3af29edcce2

Uploaded Miller Lab Devshed version a51c894f5bed
author miller-lab
date Fri, 28 Sep 2012 11:57:18 -0400
parents 8ae67e9fb6ff
children d6b961721037
comparison
equal deleted inserted replaced
16:be0e2223c531 17:a3af29edcce2
1 <tool id="gd_find_intervals" name="Remarkable Intervals" version="1.0.0">
2 <description>: Find high-scoring runs of SNPs</description>
3
4 <command interpreter="python">
5 find_intervals.py "$input" "$input.metadata.dbkey" "$output" "$output.files_path"
6
7 #if $override_metadata.choice == "0"
8 "$input.metadata.ref" "$input.metadata.rPos"
9 #else
10 "$override_metadata.ref_col" "$override_metadata.rpos_col"
11 #end if
12
13 "$score_col" "$shuffles"
14
15 #if $cutoff.type == 'percentage'
16 "$cutoff.cutoff_pct"
17 #else
18 "=$cutoff.cutoff_val"
19 #end if
20
21 "$out_format"
22 </command>
23
24 <inputs>
25 <param name="input" type="data" format="tabular" label="Input">
26 <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" />
27 </param>
28
29 <param name="score_col" type="data_column" data_ref="input" numerical="true" label="Column with score"/>
30
31 <conditional name="cutoff">
32 <param name="type" type="select" label="Cutoff type">
33 <option value="percentage">percentage</option>
34 <option value="value">value</option>
35 </param>
36 <when value="percentage">
37 <param name="cutoff_pct" type="float" value="95" min="0" max="100" label="Percentage cutoff"/>
38 </when>
39 <when value="value">
40 <param name="cutoff_val" type="float" value="0.0" label="Value cutoff"/>
41 </when>
42 </conditional>
43
44 <param name="shuffles" type="integer" min="0" value="0" label="Number of randomizations"/>
45
46 <param name="out_format" type="select" format="integer" label="Report individual positions">
47 <option value="0" selected="true">No</option>
48 <option value="1">Yes</option>
49 </param>
50
51 <conditional name="override_metadata">
52 <param name="choice" type="select" format="integer" label="Choose columns" help="Note: you need to choose the columns if the input dataset is not gd_snp">
53 <option value="0" selected="true">No, get columns from metadata</option>
54 <option value="1" >Yes, choose columns</option>
55 </param>
56 <when value="0" />
57 <when value="1">
58 <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome" help="Note: be sure the build in the metadata is the same as using here."/>
59 <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position" help="Note: either zero or one based positions will work"/>
60 </when>
61 </conditional>
62 </inputs>
63
64 <outputs>
65 <data name="output" format="interval">
66 <change_format>
67 <when input="out_format" value="1" format="bigwigpos" />
68 </change_format>
69 </data>
70 </outputs>
71
72 <tests>
73 <test>
74 <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
75 <param name="score_col" value="5" />
76 <param name="type" value="value" />
77 <param name="cutoff_val" value="700.0" />
78 <param name="shuffles" value="10" />
79 <param name="out_format" value="0" />
80 <param name="choice" value="0" />
81
82 <output name="output" file="test_out/find_intervals/find_intervals.interval" />
83 </test>
84 </tests>
85
86 <help>
87
88 **Dataset formats**
89
90 The input dataset is tabular_, with required columns of chromosome, position,
91 and score (in any column).
92 The output dataset is interval_. (`Dataset missing?`_)
93
94 .. _interval: ./static/formatHelp.html#interval
95 .. _tabular: ./static/formatHelp.html#tab
96 .. _Dataset missing?: ./static/formatHelp.html
97
98 -----
99
100 **What it does**
101
102 The user selects a tabular dataset (such as a gd_snp dataset) and
103 if the dataset is not also gd_snp format, specifies
104 the columns containing chromosome, position, and scores (such as an Fst-value for the SNP).
105 For gd_snp format the metadata can be used to specify the chromosome and
106 position.
107 Other inputs include
108 a percentage or raw score for the "cutoff" which should be greater than the
109 average value for the scores column. A higher value will give smaller intervals
110 in the output.
111 If a percentage (e.g. 95%) is specified
112 then that percentile of the scores is used as the cutoff;
113 percentile may not work well if many rows or SNPs have the same score
114 (in that case use a raw score). The program subtracts the
115 cutoff from every score, then finds genomic intervals (i.e., consecutive runs
116 of SNPs) whose total score cannot be increased by adding or subtracting one
117 or more adjusted scores at the ends of the interval.
118 Another input is the number of times the
119 data should be randomized (only intervals with score exceeding the maximum for
120 the randomized data are reported).
121 If 100 shuffles are requested, then any interval reported by the tool has a
122 score with probability less than 0.01 of being equaled or exceeded by chance.
123
124 -----
125
126 **Example**
127
128 - input (gd_snp)::
129
130 Contig222_chr2_9817738_9818143 220 C T 888.0 chr2 9817960 C 17 0 2 78 12 0 2 63 20 0 2 87 8 0 2 51 11 0 2 60 12 0 2 63 Y 76 0.093 1
131 Contig47_chr2_25470778_25471576 126 G A 888.0 chr2 25470896 G 12 0 2 63 14 0 2 69 14 0 2 69 10 0 2 57 18 0 2 81 13 0 2 66 N 11 0.289 1
132 ...
133 Contig115_chr2_61631913_61632510 310 G T 999.3 chr2 61632216 G 7 0 2 48 9 0 2 54 7 0 2 48 11 0 2 60 10 0 2 57 10 0 2 57 N 13 0.184 0
134 Contig31_chr2_67331584_67331785 39 C T 999.0 chr2 67331623 C 11 0 2 60 10 0 2 57 7 0 2 48 9 0 2 54 2 0 2 33 4 0 2 39 N 110 0.647 1
135 etc.
136
137 - output not reporting individual positions::
138
139 chr2 9817960 67331624 1272.2000
140
141 </help>
142 </tool>