Mercurial > repos > rnateam > graphclust_preprocessing
comparison preprocessing.xml @ 7:07ad2d77f28a draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
author | rnateam |
---|---|
date | Mon, 22 May 2017 12:45:22 -0400 |
parents | dff6a5a17221 |
children | a04e93fdb40a |
comparison
equal
deleted
inserted
replaced
6:dff6a5a17221 | 7:07ad2d77f28a |
---|---|
1 <tool id="preproc" name="Preprocessing" version="0.1"> | 1 <tool id="preproc" name="Preprocessing" version="0.2"> |
2 <requirements> | 2 <requirements> |
3 <requirement type="package" version="0.1.12">graphclust-wrappers</requirement> | 3 <requirement type="package" version="0.1.12">graphclust-wrappers</requirement> |
4 </requirements> | 4 </requirements> |
5 <stdio> | 5 <stdio> |
6 <exit_code range="1:" /> | 6 <exit_code range="1:" /> |
7 </stdio> | 7 </stdio> |
8 <command> | 8 <command> |
9 <![CDATA[ | 9 <![CDATA[ |
10 preprocessing.pl | |
11 '$fastaFile' | |
12 $max_length | |
13 $in_winShift | |
14 $min_seq_length | |
10 | 15 |
11 'preprocessing.pl' | 16 #if $SHAPEdata: |
12 '$fastaFile' $max_length $in_winShift $min_seq_length | 17 && |
13 | 18 python '$__tool_directory__/splitSHAPE.py' |
19 '$SHAPEdata' | |
20 $max_length | |
21 #end if | |
14 ]]> | 22 ]]> |
15 </command> | 23 </command> |
16 <inputs> | 24 <inputs> |
17 <param type="data" name="fastaFile" format="fasta" /> | 25 <param type="data" name="fastaFile" format="fasta" /> |
26 <param type="data" name="SHAPEdata" format="txt" optional="true" label="SHAPE data"/> | |
18 <param name="max_length" type="integer" value="10000" size="5" label="window size"/> | 27 <param name="max_length" type="integer" value="10000" size="5" label="window size"/> |
19 <param name="in_winShift" type="integer" value="100" size="5" label="window shift in percent"/> | 28 <param name="in_winShift" type="integer" value="100" size="5" label="window shift in percent"/> |
20 <param name="min_seq_length" type="integer" value="5" size="5" label="minimum sequence length"/> | 29 <param name="min_seq_length" type="integer" value="5" size="5" label="minimum sequence length"/> |
21 </inputs> | 30 </inputs> |
22 | |
23 <outputs> | 31 <outputs> |
24 <data name="data.fasta" format="fasta" from_work_dir="FASTA/data.fasta" label="data.fasta"/> | 32 <data name="data.fasta" format="fasta" from_work_dir="FASTA/data.fasta" label="data.fasta"/> |
25 <data name="data.map" format="txt" from_work_dir="FASTA/data.map" label="data.map"/> | 33 <data name="data.map" format="txt" from_work_dir="FASTA/data.map" label="data.map"/> |
26 <data name="data.names" format="txt" from_work_dir="FASTA/data.names" label="data.names"/> | 34 <data name="data.names" format="txt" from_work_dir="FASTA/data.names" label="data.names"/> |
27 <data name="data.fasta.scan" format="fasta" from_work_dir="FASTA/data.fasta.scan" label="data.fasta.scan"/> | 35 <data name="data.fasta.scan" format="fasta" from_work_dir="FASTA/data.fasta.scan" label="data.fasta.scan"/> |
28 <data name="FASTA" format="zip" from_work_dir="FASTA.zip" label="FASTA.ZIP"/> | 36 <data name="FASTA" format="zip" from_work_dir="FASTA.zip" label="FASTA.ZIP"/> |
37 <data name="shape_data_split" format="txt" from_work_dir="shape_data_split.react" label="SHAPE data splited"/> | |
29 </outputs> | 38 </outputs> |
30 | |
31 | |
32 <tests> | 39 <tests> |
33 <test> | 40 <test> |
34 <param name="fastaFile" value="input.fa"/> | 41 <param name="fastaFile" value="input.fa"/> |
35 <param name="max_length" value="10000"/> | 42 <param name="max_length" value="10000"/> |
36 <param name="in_winShift" value="100"/> | 43 <param name="in_winShift" value="100"/> |
37 <param name="min_seq_length" value="5"/> | 44 <param name="min_seq_length" value="5"/> |
38 <output name="data.fasta" file="FASTA/data.fasta"/> | 45 <output name="data.fasta" file="FASTA/data.fasta"/> |
39 <output name="data.map" file="FASTA/data.map" /> | 46 <output name="data.map" file="FASTA/data.map" /> |
40 <output name="data.names" file="FASTA/data.names"/> | 47 <output name="data.names" file="FASTA/data.names"/> |
41 <output name="data.fasta.scan" file="FASTA/data.fasta.scan" /> | 48 <output name="data.fasta.scan" file="FASTA/data.fasta.scan" /> |
42 </test> | 49 </test> |
43 </tests> | 50 </tests> |
44 | |
45 <help> | 51 <help> |
46 <![CDATA[ | 52 <![CDATA[ |
47 | 53 |
48 **What it does** | 54 **What it does** |
49 | 55 |
55 The shift of the sliding window can be defined via option *window shift in percent*. | 61 The shift of the sliding window can be defined via option *window shift in percent*. |
56 This paramter reflects the expected length of signals to be found. | 62 This paramter reflects the expected length of signals to be found. |
57 Slightly larger windows are usually ok. Too small windows can disturb existing signals. | 63 Slightly larger windows are usually ok. Too small windows can disturb existing signals. |
58 | 64 |
59 | 65 |
60 | |
61 | |
62 + **window shift in percent** : Relative window size in % for window shift during input preprocessing. | 66 + **window shift in percent** : Relative window size in % for window shift during input preprocessing. |
63 Please note that a small shift results in much more fragments for clustering. The benefit is that RNA | 67 Please note that a small shift results in much more fragments for clustering. The benefit is that RNA |
64 motifs/structures are not destroyed by arbitrary split points. Smaller | 68 motifs/structures are not destroyed by arbitrary split points. Smaller |
65 shifts usually increase the cluster quality. Too small shifts (<20) are not | 69 shifts usually increase the cluster quality. Too small shifts (<20) are not |
66 recommended as a dense center is "polluted" by overlapping fragments and | 70 recommended as a dense center is "polluted" by overlapping fragments and |
67 no other occurences in the dataset can be found. | 71 no other occurences in the dataset can be found. |
68 | 72 |
69 | 73 |
70 | |
71 | |
72 | |
73 + **minimum sequence length** : Minimal length of input sequences. | 74 + **minimum sequence length** : Minimal length of input sequences. |
74 Every input sequence below that length is ignored completely during clustering. | 75 Every input sequence below that length is ignored completely during clustering. |
75 | 76 |
76 | |
77 ]]></help> | 77 ]]></help> |
78 | |
79 | |
80 <citations> | 78 <citations> |
81 <citation type="doi">10.1093/bioinformatics/bts224</citation> | 79 <citation type="doi">10.1093/bioinformatics/bts224</citation> |
82 </citations> | 80 </citations> |
83 | |
84 | |
85 </tool> | 81 </tool> |