comparison find_intervals.xml @ 21:d6b961721037

Miller Lab Devshed version 4c04e35b18f6
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 05 Nov 2012 12:44:17 -0500
parents 8ae67e9fb6ff
children 95a05c1ef5d5
comparison
equal deleted inserted replaced
20:8a4b8efbc82c 21:d6b961721037
20 20
21 "$out_format" 21 "$out_format"
22 </command> 22 </command>
23 23
24 <inputs> 24 <inputs>
25 <param name="input" type="data" format="tabular" label="Input"> 25 <param name="input" type="data" format="tabular" label="Dataset">
26 <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" /> 26 <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" />
27 </param> 27 </param>
28 28
29 <param name="score_col" type="data_column" data_ref="input" numerical="true" label="Column with score"/> 29 <param name="score_col" type="data_column" data_ref="input" numerical="true" label="Column with score"/>
30 30
31 <conditional name="cutoff"> 31 <conditional name="cutoff">
32 <param name="type" type="select" label="Cutoff type"> 32 <param name="type" type="select" label="Score-shift type">
33 <option value="percentage">percentage</option> 33 <option value="percentage">percentage</option>
34 <option value="value">value</option> 34 <option value="value">value</option>
35 </param> 35 </param>
36 <when value="percentage"> 36 <when value="percentage">
37 <param name="cutoff_pct" type="float" value="95" min="0" max="100" label="Percentage cutoff"/> 37 <param name="cutoff_pct" type="float" value="95" min="0" max="100" label="Percentage score-shift"/>
38 </when> 38 </when>
39 <when value="value"> 39 <when value="value">
40 <param name="cutoff_val" type="float" value="0.0" label="Value cutoff"/> 40 <param name="cutoff_val" type="float" value="0.0" label="Value score-shift"/>
41 </when> 41 </when>
42 </conditional> 42 </conditional>
43 43
44 <param name="shuffles" type="integer" min="0" value="0" label="Number of randomizations"/> 44 <param name="shuffles" type="integer" min="0" value="0" label="Number of randomizations"/>
45 45
46 <param name="out_format" type="select" format="integer" label="Report individual positions"> 46 <param name="out_format" type="select" format="integer" label="Report individual positions">
47 <option value="0" selected="true">No</option> 47 <option value="0" selected="true">no</option>
48 <option value="1">Yes</option> 48 <option value="1">yes</option>
49 </param> 49 </param>
50 50
51 <conditional name="override_metadata"> 51 <conditional name="override_metadata">
52 <param name="choice" type="select" format="integer" label="Choose columns" help="Note: you need to choose the columns if the input dataset is not gd_snp"> 52 <param name="choice" type="select" format="integer" label="Choose columns" help="Note: you must choose the columns if the input dataset is not gd_snp.">
53 <option value="0" selected="true">No, get columns from metadata</option> 53 <option value="0" selected="true">no, get columns from metadata</option>
54 <option value="1" >Yes, choose columns</option> 54 <option value="1" >yes, choose columns here</option>
55 </param> 55 </param>
56 <when value="0" /> 56 <when value="0" />
57 <when value="1"> 57 <when value="1">
58 <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome" help="Note: be sure the build in the metadata is the same as using here."/> 58 <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome" help="Note: be sure this corresponds to the build recorded in the metadata."/>
59 <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position" help="Note: either zero or one based positions will work"/> 59 <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position" help="Note: either zero-based or one-based positions will work."/>
60 </when> 60 </when>
61 </conditional> 61 </conditional>
62 </inputs> 62 </inputs>
63 63
64 <outputs> 64 <outputs>
103 if the dataset is not also gd_snp format, specifies 103 if the dataset is not also gd_snp format, specifies
104 the columns containing chromosome, position, and scores (such as an Fst-value for the SNP). 104 the columns containing chromosome, position, and scores (such as an Fst-value for the SNP).
105 For gd_snp format the metadata can be used to specify the chromosome and 105 For gd_snp format the metadata can be used to specify the chromosome and
106 position. 106 position.
107 Other inputs include 107 Other inputs include
108 a percentage or raw score for the "cutoff" which should be greater than the 108 a percentage or raw score for the "score-shift" which should be greater than the
109 average value for the scores column. A higher value will give smaller intervals 109 average value for the scores column. A higher value will give smaller intervals
110 in the output. 110 in the output.
111 If a percentage (e.g. 95%) is specified 111 If a percentage (e.g. 95%) is specified
112 then that percentile of the scores is used as the cutoff; 112 then that percentile of the scores is used as the shift;
113 percentile may not work well if many rows or SNPs have the same score 113 percentile may not work well if many rows or SNPs have the same score
114 (in that case use a raw score). The program subtracts the 114 (in that case use a raw score). The program subtracts the
115 cutoff from every score, then finds genomic intervals (i.e., consecutive runs 115 shift from every score, then finds genomic intervals (i.e., consecutive runs
116 of SNPs) whose total score cannot be increased by adding or subtracting one 116 of SNPs) whose total score cannot be increased by adding or subtracting one
117 or more adjusted scores at the ends of the interval. 117 or more adjusted scores at the ends of the interval.
118 Another input is the number of times the 118 Another input is the number of times the
119 data should be randomized (only intervals with score exceeding the maximum for 119 data should be randomized (only intervals with score exceeding the maximum for
120 the randomized data are reported). 120 the randomized data are reported).