comparison preprocessing.xml @ 0:3eb088816194 draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 21aaee40723b5341b4236edeb0e72995c2054053
author rnateam
date Fri, 16 Dec 2016 07:36:58 -0500
parents
children cf2673f7eb44
comparison
equal deleted inserted replaced
-1:000000000000 0:3eb088816194
1 <tool id="preproc" name="Preprocessing" version="0.1">
2 <requirements>
3 <requirement type="package" version="0.1">graphclust-wrappers</requirement>
4 </requirements>
5 <stdio>
6 <exit_code range="1:" />
7 </stdio>
8 <command>
9 <![CDATA[
10
11 'preprocessing.pl'
12 '$fastaFile' $max_length $in_winShift $min_seq_length
13
14 ]]>
15 </command>
16 <inputs>
17 <param type="data" name="fastaFile" format="fasta" />
18 <param name="max_length" type="integer" value="10000" size="5" label="window size"/>
19 <param name="in_winShift" type="integer" value="100" size="5" label="window shift in percent"/>
20 <param name="min_seq_length" type="integer" value="5" size="5" label="minimum sequence length"/>
21 </inputs>
22
23 <outputs>
24 <data name="data.fasta" format="fasta" from_work_dir="FASTA/data.fasta" label="data.fasta"/>
25 <data name="data.map" format="txt" from_work_dir="FASTA/data.map" label="data.map"/>
26 <data name="data.names" format="txt" from_work_dir="FASTA/data.names" label="data.names"/>
27 <data name="data.fasta.scan" format="fasta" from_work_dir="FASTA/data.fasta.scan" label="data.fasta.scan"/>
28 <data name="FASTA" format="zip" from_work_dir="FASTA.zip" label="FASTA.ZIP"/>
29 </outputs>
30
31
32 <tests>
33 <test>
34 <param name="fastaFile" value="input.fa"/>
35 <param name="max_length" value="10000"/>
36 <param name="in_winShift" value="100"/>
37 <param name="min_seq_length" value="5"/>
38 <output name="data.fasta" file="FASTA/data.fasta"/>
39 <output name="data.map" file="FASTA/data.map" />
40 <output name="data.names" file="FASTA/data.names"/>
41 <output name="data.fasta.scan" file="FASTA/data.fasta.scan" />
42 </test>
43 </tests>
44
45 <help>
46 <![CDATA[
47
48 **What it does**
49
50 The tool takes as an input file of sequences in Fasta format and creates the final input for GraphCLust based on given parameters.
51
52 **Parameters**
53
54 + **window size** : All input sequences are splitted into fragments of this length.
55 The shift of the sliding window can be defined via option *window shift in percent*.
56 This paramter reflects the expected length of signals to be found.
57 Slightly larger windows are usually ok. Too small windows can disturb existing signals.
58
59
60
61
62 + **window shift in percent** : Relative window size in % for window shift during input preprocessing.
63 Please note that a small shift results in much more fragments for clustering. The benefit is that RNA
64 motifs/structures are not destroyed by arbitrary split points. Smaller
65 shifts usually increase the cluster quality. Too small shifts (<20) are not
66 recommended as a dense center is "polluted" by overlapping fragments and
67 no other occurences in the dataset can be found.
68
69
70
71
72
73 + **minimum sequence length** : Minimal length of input sequences.
74 Every input sequence below that length is ignored completely during clustering.
75
76
77 ]]></help>
78
79
80 <citations>
81 <citation type="doi">10.1093/bioinformatics/bts224</citation>
82 </citations>
83
84
85 </tool>