Mercurial > repos > cafletezbrant > kmersvm
comparison kmersvm/nullseq.xml @ 0:7fe1103032f7 draft
Uploaded
author | cafletezbrant |
---|---|
date | Mon, 20 Aug 2012 18:07:22 -0400 |
parents | |
children | fd740d515502 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7fe1103032f7 |
---|---|
1 <tool id="kmersvm_nullseq" name="Generate Null Sequence"> | |
2 <description>using random sampling from genomic DNA</description> | |
3 <command interpreter="python">scripts/nullseq_generate.py -q | |
4 #if str($excluded) !="None": | |
5 -e $excluded | |
6 #end if | |
7 -x $fold -r $rseed -g $gc_err -t $rpt_err $input $dbkey ${indices_path.fields.path} | |
8 </command> | |
9 <inputs> | |
10 <param name="fold" type="integer" value="1" label="# of Fold-Increase" /> | |
11 <param name="gc_err" type="float" value="0.02" label="Allowable GC Error" /> | |
12 <param name="rpt_err" type="float" value="0.02" label="Allowable Repeat Error" /> | |
13 <param name="rseed" type="integer" value="1" label="Random Number Seed" /> | |
14 <param format="interval" name="input" type="data" label="BED File of Positive Regions" /> | |
15 <validator type="unspecified_build" /> | |
16 <validator type="dataset_metadata_in_file" filename="nullseq_indices.loc" metadata_name="dbkey" metadata_column="0" message="Sequences are currently unavailable for the specified build." /> | |
17 <param name="excluded" optional="true" format="interval" type="data" value="None" label="Excluded Regions (optional)" /> | |
18 <param name="indices_path" type="select" label="Available Datasets"> | |
19 <options from_file="nullseq_indices.loc"> | |
20 <column name="dbkey" index="0"/> | |
21 <column name="value" index="0"/> | |
22 <column name="name" index="1"/> | |
23 <column name="path" index="2"/> | |
24 <!--filter type="data_meta" ref="input" key="dbkey" column="0" /--> | |
25 </options> | |
26 </param> | |
27 </inputs> | |
28 <outputs> | |
29 <data format="interval" name="nullseq_output" from_work_dir="nullseq_output.bed" /> | |
30 </outputs> | |
31 <tests> | |
32 <test> | |
33 <param name="input" value="nullseq_test.bed" /> | |
34 <param name="fold" value="1" /> | |
35 <param name="gc_err" value="0.02" /> | |
36 <param name="rpt_err" value="0.02" /> | |
37 <param name="rseed" value="1" /> | |
38 <param name="indices_path" value="hg19" /> | |
39 <output name="output" file="nullseq_output.bed" /> | |
40 </test> | |
41 </tests> | |
42 <help> | |
43 | |
44 **What it does** | |
45 | |
46 Takes an input BED file and generates a set of sequences for use as negative data (null sequences) in Train SVM similar in length, GC content and repeat fraction. Uses random sampling for efficiency. | |
47 | |
48 **Parameters** | |
49 | |
50 Fold-Increase: Size of desired null sequence data set expressed as multiple of the size of the input data set. | |
51 | |
52 GC Error, Repeat Error: Acceptable difference between a positive sequence and its corresponding null sequence in terms of GC content, repeat content. | |
53 | |
54 Random Number Seed: Seed for random number generator. | |
55 | |
56 Excluded Regions: Submitted regions will be excluded from null sequence generation. | |
57 | |
58 ---- | |
59 | |
60 **Example** | |
61 | |
62 Given a BED file containing:: | |
63 | |
64 chr1 10212203 10212303 | |
65 chr1 103584748 103584848 | |
66 chr1 105299130 105299230 | |
67 chr1 106367772 106367872 | |
68 | |
69 Tool will output BED file matched in length, GC content and repeat content:: | |
70 | |
71 chr1 3089935 3090035 | |
72 chr1 5031335 5031435 | |
73 chr1 5103742 5103842 | |
74 chr1 5650372 5650472 | |
75 | |
76 </help> | |
77 </tool> |