changeset 0:3eb088816194 draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 21aaee40723b5341b4236edeb0e72995c2054053
author rnateam
date Fri, 16 Dec 2016 07:36:58 -0500
parents
children cf2673f7eb44
files preprocessing.xml test-data/FASTA/data.fasta test-data/FASTA/data.fasta.scan test-data/FASTA/data.map test-data/FASTA/data.names test-data/input.fa
diffstat 6 files changed, 165 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preprocessing.xml	Fri Dec 16 07:36:58 2016 -0500
@@ -0,0 +1,85 @@
+<tool id="preproc" name="Preprocessing" version="0.1">
+  <requirements>
+    <requirement type="package" version="0.1">graphclust-wrappers</requirement>
+  </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command>
+		<![CDATA[
+
+    'preprocessing.pl'
+    '$fastaFile'  $max_length $in_winShift $min_seq_length
+
+]]>
+	</command>
+    <inputs>
+        <param type="data" name="fastaFile" format="fasta" />
+        <param name="max_length" type="integer" value="10000" size="5" label="window size"/>
+        <param name="in_winShift" type="integer" value="100" size="5" label="window shift in percent"/>
+        <param name="min_seq_length" type="integer" value="5" size="5" label="minimum sequence length"/>
+    </inputs>
+
+    <outputs>
+        <data name="data.fasta" format="fasta" from_work_dir="FASTA/data.fasta" label="data.fasta"/>
+        <data name="data.map" format="txt" from_work_dir="FASTA/data.map" label="data.map"/>
+        <data name="data.names" format="txt" from_work_dir="FASTA/data.names" label="data.names"/>
+        <data name="data.fasta.scan" format="fasta" from_work_dir="FASTA/data.fasta.scan" label="data.fasta.scan"/>
+        <data name="FASTA" format="zip" from_work_dir="FASTA.zip" label="FASTA.ZIP"/>
+    </outputs>
+
+
+    <tests>
+    <test>
+        <param name="fastaFile" value="input.fa"/>
+        <param name="max_length" value="10000"/>
+        <param name="in_winShift" value="100"/>
+        <param name="min_seq_length" value="5"/>
+        <output name="data.fasta" file="FASTA/data.fasta"/>
+        <output name="data.map" file="FASTA/data.map" />
+        <output name="data.names" file="FASTA/data.names"/>
+        <output name="data.fasta.scan" file="FASTA/data.fasta.scan" />
+    </test>
+</tests>
+
+    <help>
+<![CDATA[
+
+**What it does**
+
+The tool takes as an input file of sequences in Fasta format and creates the final input for GraphCLust based on given parameters.
+
+**Parameters**
+
++ **window size** : All input sequences are splitted into fragments of this length.
+  The shift of the sliding window can be defined via option *window shift in percent*.
+  This paramter reflects the expected length of signals to be found.
+  Slightly larger windows are usually ok. Too small windows can disturb existing signals.
+
+
+
+
++ **window shift in percent** : Relative window size in % for window shift during input preprocessing.
+  Please note that a small shift results in much more fragments for clustering. The benefit is that RNA
+  motifs/structures are not destroyed by arbitrary split points. Smaller
+  shifts usually increase the cluster quality. Too small shifts (<20) are not
+  recommended as a dense center is "polluted" by overlapping fragments and
+  no other occurences in the dataset can be found.
+
+
+
+
+
++ **minimum sequence length** : Minimal length of input sequences.
+  Every input sequence below that length is ignored completely during clustering.
+
+
+    ]]></help>
+
+
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/bts224</citation>
+    </citations>
+
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FASTA/data.fasta	Fri Dec 16 07:36:58 2016 -0500
@@ -0,0 +1,20 @@
+>1 SEQ1#1#120#+ ORIGID RF00001_rep.0_AL096764.11/46123-46004_1 ORIGHEAD RF00001_rep.0
+GUCUAUGGCCAUACCACCCUGAAUGUGCUUGAUCUCAUCUGAUCUCGUGAAGCCAAGCAGGGUGGGGCCUAGUUAGUACUUGGAUGGGAGACUUCCUGGGAAUAUAAGCUGCUGUUGGCU
+>2 SEQ2#1#118#+ ORIGID RF00001_rep.1_U89919.1/939-1056_2 ORIGHEAD RF00001_rep.1
+CUUUACGGCCACACCACCCUGAACGCACCGGAUCUCGACUGACCUUGAAAGCUAAGCAGGAUCGGGCCUGGUUAGUAUUGGGAUGGCAGACCCCCUGGAAAUACAGGGUGCUGAAGGU
+>3 SEQ3#1#104#+ ORIGID RF00001_rep.2_AJ508600.1/161-58_3 ORIGHEAD RF00001_rep.2
+GUCUACAGCCAUACCAUCCUGAACAUGCCAGAUCUUGUCUGACCUCUGAAGCUAAGCAGGGUCAAGCCUGGUUAGUACUUGGGAGAAGCUGGUGUGGCUAGACC
+>4 SEQ4#1#73#+ ORIGID RF00005_rep.0_M15347.1/1040-968_4 ORIGHEAD RF00005_rep.0
+GGCUCCAUAGCUCAGGGGUUAGAGCACUGGUCUUGUAAACCAGGGGUCGCGAGUUCAAUUCUCGCUGGGGCUU
+>5 SEQ5#1#72#+ ORIGID RF00005_rep.10_X58792.1/174-245_5 ORIGHEAD RF00005_rep.10
+GGUCCCAUGGUGUAAUGGUUAGCACUCUGGACUUUGAAUCCAGCGAUCCGAGUUCAAAUCUCGGUGGGACCU
+>6 SEQ6#1#66#+ ORIGID RF00005_rep.11_AF346992.1/15890-15955_6 ORIGHEAD RF00005_rep.11
+GUCCUUGUAGUAUAAACUAAUACACCAGUCUUGUAAACCGGAGAUGAAAACCUUUUUCCAAGGACA
+>7 SEQ7#1#83#+ ORIGID RF00005_rep.12_AC108081.2/59868-59786_7 ORIGHEAD RF00005_rep.12
+GUCAGGAUGGCCGAGCGGUCUAAGGCGCUGCGUUCAGGUCGCAGUCUCCCCUGGAGGCGUGGGUUCGAAUCCCACUUCUGACA
+>8 SEQ8#1#70#+ ORIGID RF00005_rep.13_AC067849.6/4771-4840_8 ORIGHEAD RF00005_rep.13
+CACUGUAAAGCUAACUUAGCAUUAACCUUUUAAGUUAAAGAUUAAGAGAACCAACACCUCUUUACAGUGA
+>9 SEQ9#1#73#+ ORIGID RF00005_rep.14_AL021808.2/65570-65498_9 ORIGHEAD RF00005_rep.14
+GCUUCUGUAGUGUAGUGGUUAUCACGUUCGCCUCACACGCGAAAGGUCCCCGGUUCGAAACCGGGCAGAAGCA
+>10 SEQ10#1#73#+ ORIGID RF00005_rep.15_AC008443.10/42590-42518_10 ORIGHEAD RF00005_rep.15
+GCCCGGCUAGCUCAGUCGGUAGAGCAUGAGACUCUUAAUCUCAGGGUCGUGGGUUCGAGCCCCACGUUGGGCG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FASTA/data.fasta.scan	Fri Dec 16 07:36:58 2016 -0500
@@ -0,0 +1,20 @@
+>SEQ1 ORIGID RF00001_rep.0_AL096764.11/46123-46004_1 ORIGHEAD RF00001_rep.0
+GUCUAUGGCCAUACCACCCUGAAUGUGCUUGAUCUCAUCUGAUCUCGUGAAGCCAAGCAGGGUGGGGCCUAGUUAGUACUUGGAUGGGAGACUUCCUGGGAAUAUAAGCUGCUGUUGGCU
+>SEQ2 ORIGID RF00001_rep.1_U89919.1/939-1056_2 ORIGHEAD RF00001_rep.1
+CUUUACGGCCACACCACCCUGAACGCACCGGAUCUCGACUGACCUUGAAAGCUAAGCAGGAUCGGGCCUGGUUAGUAUUGGGAUGGCAGACCCCCUGGAAAUACAGGGUGCUGAAGGU
+>SEQ3 ORIGID RF00001_rep.2_AJ508600.1/161-58_3 ORIGHEAD RF00001_rep.2
+GUCUACAGCCAUACCAUCCUGAACAUGCCAGAUCUUGUCUGACCUCUGAAGCUAAGCAGGGUCAAGCCUGGUUAGUACUUGGGAGAAGCUGGUGUGGCUAGACC
+>SEQ4 ORIGID RF00005_rep.0_M15347.1/1040-968_4 ORIGHEAD RF00005_rep.0
+GGCUCCAUAGCUCAGGGGUUAGAGCACUGGUCUUGUAAACCAGGGGUCGCGAGUUCAAUUCUCGCUGGGGCUU
+>SEQ5 ORIGID RF00005_rep.10_X58792.1/174-245_5 ORIGHEAD RF00005_rep.10
+GGUCCCAUGGUGUAAUGGUUAGCACUCUGGACUUUGAAUCCAGCGAUCCGAGUUCAAAUCUCGGUGGGACCU
+>SEQ6 ORIGID RF00005_rep.11_AF346992.1/15890-15955_6 ORIGHEAD RF00005_rep.11
+GUCCUUGUAGUAUAAACUAAUACACCAGUCUUGUAAACCGGAGAUGAAAACCUUUUUCCAAGGACA
+>SEQ7 ORIGID RF00005_rep.12_AC108081.2/59868-59786_7 ORIGHEAD RF00005_rep.12
+GUCAGGAUGGCCGAGCGGUCUAAGGCGCUGCGUUCAGGUCGCAGUCUCCCCUGGAGGCGUGGGUUCGAAUCCCACUUCUGACA
+>SEQ8 ORIGID RF00005_rep.13_AC067849.6/4771-4840_8 ORIGHEAD RF00005_rep.13
+CACUGUAAAGCUAACUUAGCAUUAACCUUUUAAGUUAAAGAUUAAGAGAACCAACACCUCUUUACAGUGA
+>SEQ9 ORIGID RF00005_rep.14_AL021808.2/65570-65498_9 ORIGHEAD RF00005_rep.14
+GCUUCUGUAGUGUAGUGGUUAUCACGUUCGCCUCACACGCGAAAGGUCCCCGGUUCGAAACCGGGCAGAAGCA
+>SEQ10 ORIGID RF00005_rep.15_AC008443.10/42590-42518_10 ORIGHEAD RF00005_rep.15
+GCCCGGCUAGCUCAGUCGGUAGAGCAUGAGACUCUUAAUCUCAGGGUCGUGGGUUCGAGCCCCACGUUGGGCG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FASTA/data.map	Fri Dec 16 07:36:58 2016 -0500
@@ -0,0 +1,10 @@
+1 SEQ1#1#120#+
+2 SEQ2#1#118#+
+3 SEQ3#1#104#+
+4 SEQ4#1#73#+
+5 SEQ5#1#72#+
+6 SEQ6#1#66#+
+7 SEQ7#1#83#+
+8 SEQ8#1#70#+
+9 SEQ9#1#73#+
+10 SEQ10#1#73#+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FASTA/data.names	Fri Dec 16 07:36:58 2016 -0500
@@ -0,0 +1,10 @@
+1 SEQ1#1#120#+ ORIGID RF00001_rep.0_AL096764.11/46123-46004_1 ORIGHEAD RF00001_rep.0
+2 SEQ2#1#118#+ ORIGID RF00001_rep.1_U89919.1/939-1056_2 ORIGHEAD RF00001_rep.1
+3 SEQ3#1#104#+ ORIGID RF00001_rep.2_AJ508600.1/161-58_3 ORIGHEAD RF00001_rep.2
+4 SEQ4#1#73#+ ORIGID RF00005_rep.0_M15347.1/1040-968_4 ORIGHEAD RF00005_rep.0
+5 SEQ5#1#72#+ ORIGID RF00005_rep.10_X58792.1/174-245_5 ORIGHEAD RF00005_rep.10
+6 SEQ6#1#66#+ ORIGID RF00005_rep.11_AF346992.1/15890-15955_6 ORIGHEAD RF00005_rep.11
+7 SEQ7#1#83#+ ORIGID RF00005_rep.12_AC108081.2/59868-59786_7 ORIGHEAD RF00005_rep.12
+8 SEQ8#1#70#+ ORIGID RF00005_rep.13_AC067849.6/4771-4840_8 ORIGHEAD RF00005_rep.13
+9 SEQ9#1#73#+ ORIGID RF00005_rep.14_AL021808.2/65570-65498_9 ORIGHEAD RF00005_rep.14
+10 SEQ10#1#73#+ ORIGID RF00005_rep.15_AC008443.10/42590-42518_10 ORIGHEAD RF00005_rep.15
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fa	Fri Dec 16 07:36:58 2016 -0500
@@ -0,0 +1,20 @@
+>RF00001_rep.0_AL096764.11/46123-46004 RF00001_rep.0
+GUCUAUGGCCAUACCACCCUGAAUGUGCUUGAUCUCAUCUGAUCUCGUGAAGCCAAGCAGGGUGGGGCCUAGUUAGUACUUGGAUGGGAGACUUCCUGGGAAUAUAAGCUGCUGUUGGCU
+>RF00001_rep.1_U89919.1/939-1056 RF00001_rep.1
+CUUUACGGCCACACCACCCUGAACGCACCGGAUCUCGACUGACCUUGAAAGCUAAGCAGGAUCGGGCCUGGUUAGUAUUGGGAUGGCAGACCCCCUGGAAAUACAGGGUGCUGAAGGU
+>RF00001_rep.2_AJ508600.1/161-58 RF00001_rep.2
+GUCUACAGCCAUACCAUCCUGAACAUGCCAGAUCUUGUCUGACCUCUGAAGCUAAGCAGGGUCAAGCCUGGUUAGUACUUGGGAGAAGCUGGUGUGGCUAGACC
+>RF00005_rep.0_M15347.1/1040-968 RF00005_rep.0
+GGCUCCAUAGCUCAGGGGUUAGAGCACUGGUCUUGUAAACCAGGGGUCGCGAGUUCAAUUCUCGCUGGGGCUU
+>RF00005_rep.10_X58792.1/174-245 RF00005_rep.10
+GGUCCCAUGGUGUAAUGGUUAGCACUCUGGACUUUGAAUCCAGCGAUCCGAGUUCAAAUCUCGGUGGGACCU
+>RF00005_rep.11_AF346992.1/15890-15955 RF00005_rep.11
+GUCCUUGUAGUAUAAACUAAUACACCAGUCUUGUAAACCGGAGAUGAAAACCUUUUUCCAAGGACA
+>RF00005_rep.12_AC108081.2/59868-59786 RF00005_rep.12
+GUCAGGAUGGCCGAGCGGUCUAAGGCGCUGCGUUCAGGUCGCAGUCUCCCCUGGAGGCGUGGGUUCGAAUCCCACUUCUGACA
+>RF00005_rep.13_AC067849.6/4771-4840 RF00005_rep.13
+CACUGUAAAGCUAACUUAGCAUUAACCUUUUAAGUUAAAGAUUAAGAGAACCAACACCUCUUUACAGUGA
+>RF00005_rep.14_AL021808.2/65570-65498 RF00005_rep.14
+GCUUCUGUAGUGUAGUGGUUAUCACGUUCGCCUCACACGCGAAAGGUCCCCGGUUCGAAACCGGGCAGAAGCA
+>RF00005_rep.15_AC008443.10/42590-42518 RF00005_rep.15
+GCCCGGCUAGCUCAGUCGGUAGAGCAUGAGACUCUUAAUCUCAGGGUCGUGGGUUCGAGCCCCACGUUGGGCG