comparison kmersvm/nullseq.xml @ 0:7fe1103032f7 draft

Uploaded
author cafletezbrant
date Mon, 20 Aug 2012 18:07:22 -0400
parents
children fd740d515502
comparison
equal deleted inserted replaced
-1:000000000000 0:7fe1103032f7
1 <tool id="kmersvm_nullseq" name="Generate Null Sequence">
2 <description>using random sampling from genomic DNA</description>
3 <command interpreter="python">scripts/nullseq_generate.py -q
4 #if str($excluded) !="None":
5 -e $excluded
6 #end if
7 -x $fold -r $rseed -g $gc_err -t $rpt_err $input $dbkey ${indices_path.fields.path}
8 </command>
9 <inputs>
10 <param name="fold" type="integer" value="1" label="# of Fold-Increase" />
11 <param name="gc_err" type="float" value="0.02" label="Allowable GC Error" />
12 <param name="rpt_err" type="float" value="0.02" label="Allowable Repeat Error" />
13 <param name="rseed" type="integer" value="1" label="Random Number Seed" />
14 <param format="interval" name="input" type="data" label="BED File of Positive Regions" />
15 <validator type="unspecified_build" />
16 <validator type="dataset_metadata_in_file" filename="nullseq_indices.loc" metadata_name="dbkey" metadata_column="0" message="Sequences are currently unavailable for the specified build." />
17 <param name="excluded" optional="true" format="interval" type="data" value="None" label="Excluded Regions (optional)" />
18 <param name="indices_path" type="select" label="Available Datasets">
19 <options from_file="nullseq_indices.loc">
20 <column name="dbkey" index="0"/>
21 <column name="value" index="0"/>
22 <column name="name" index="1"/>
23 <column name="path" index="2"/>
24 <!--filter type="data_meta" ref="input" key="dbkey" column="0" /-->
25 </options>
26 </param>
27 </inputs>
28 <outputs>
29 <data format="interval" name="nullseq_output" from_work_dir="nullseq_output.bed" />
30 </outputs>
31 <tests>
32 <test>
33 <param name="input" value="nullseq_test.bed" />
34 <param name="fold" value="1" />
35 <param name="gc_err" value="0.02" />
36 <param name="rpt_err" value="0.02" />
37 <param name="rseed" value="1" />
38 <param name="indices_path" value="hg19" />
39 <output name="output" file="nullseq_output.bed" />
40 </test>
41 </tests>
42 <help>
43
44 **What it does**
45
46 Takes an input BED file and generates a set of sequences for use as negative data (null sequences) in Train SVM similar in length, GC content and repeat fraction. Uses random sampling for efficiency.
47
48 **Parameters**
49
50 Fold-Increase: Size of desired null sequence data set expressed as multiple of the size of the input data set.
51
52 GC Error, Repeat Error: Acceptable difference between a positive sequence and its corresponding null sequence in terms of GC content, repeat content.
53
54 Random Number Seed: Seed for random number generator.
55
56 Excluded Regions: Submitted regions will be excluded from null sequence generation.
57
58 ----
59
60 **Example**
61
62 Given a BED file containing::
63
64 chr1 10212203 10212303
65 chr1 103584748 103584848
66 chr1 105299130 105299230
67 chr1 106367772 106367872
68
69 Tool will output BED file matched in length, GC content and repeat content::
70
71 chr1 3089935 3090035
72 chr1 5031335 5031435
73 chr1 5103742 5103842
74 chr1 5650372 5650472
75
76 </help>
77 </tool>