Repository 'selectsequencesfrommsa'
hg clone https://toolshed.g2.bx.psu.edu/repos/rnateam/selectsequencesfrommsa

Changeset 0:48fc2c21fe1c (2017-03-11)
Next changeset 1:876c70c6ecad (2019-02-08)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/selectsequencesfrommsa commit c4bbc5b1d62a640a80681d7d467aee9eff4aa17f-dirty
added:
selectsequencesfrommsa.xml
test-data/result.clustal
test-data/result.selected
b
diff -r 000000000000 -r 48fc2c21fe1c selectsequencesfrommsa.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/selectsequencesfrommsa.xml Sat Mar 11 16:59:55 2017 -0500
[
@@ -0,0 +1,62 @@
+<tool id="selectsequencesfrommsa" name="Select Sequences" version="1.0.2">
+  <description>
+    Tool to select representative sequences from a multiple sequence alignment.
+  </description>
+  <requirements>
+    <requirement type="package" version="1.0.2">selectsequencesfrommsa</requirement>
+  </requirements>
+  <command detect_errors="aggressive"><![CDATA[
+    SelectSequencesFromMSA
+      -c '$input_clustal'
+      $x
+      -o `pwd`
+      -n $n
+      -i $i
+      -m $m
+      > warnings
+    ]]>
+  </command>
+  <inputs>
+    <param name="input_clustal" type="data" format="clustal" label="Input clustal alignment"/>
+    <param argument="-n" type="integer" value="6" min="1" label="Number of sequences in the output alignment." help=""/>
+    <param argument="-i" type="float" value="80" min="0.1" label="Optimize for this percentage of mean pairwise identity" help=""/>
+    <param argument="-m" type="float" value="95" min="0.1" label="Sequences with a higher percentage of pairwise identity will be removed." help=""/>
+    <param argument="-x" truevalue="-x" falsevalue="" checked="True" type="boolean" label="The first sequence (=reference sequence) is always present in the output alignment." help=""/>
+  </inputs>
+  <outputs>
+    <data format="clustal" name="clustal" from_work_dir="result.selected" label="Clustal alignment of selected sequences"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input_clustal" value="result.clustal"/>
+      <output name="result.selected" file="result.selected"/>
+    </test>
+  </tests>
+  <help>
+    
+<![CDATA[
+             
+**What it does**
+
+Tool to select representative sequences from a multiple sequence alignment in clustal format.
+Useful before running RNAz, RNAcode, RNAalifold on alignments with many entries.
+
+**Input**
+Multiple sequence alignment in clustal format
+
+**Options**
+-n Number of sequences in the output alignment. (Default: 6)
+-i Optimize for this percentage of mean pairwise identity (Default: 80)
+-m Sequences with a higher percentage of pairwise identity will be removed (Default: 95)
+-x The first sequence (=reference sequence) is always present in the output alignment per default. Default: True
+
+**Output**
+Clustal alignment with selected sequences
+
+]]>
+
+  </help>
+  <citations>
+    <citation type="doi">10.1093/nar/gkw558</citation>
+  </citations>
+</tool>
b
diff -r 000000000000 -r 48fc2c21fe1c test-data/result.clustal
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/result.clustal Sat Mar 11 16:59:55 2017 -0500
b
b'@@ -0,0 +1,2797 @@\n+CLUSTAL 2.1 multiple sequence alignment\n+\n+AB001721.1/2735-2851            .CC.CGGUGACU..AU.AGA.GAG.A.GGG.CC.ACAC.CCGU..U.C.CCAUCCCGAAC\n+gb|CP006694.1|:152734-152851    uCC.CGGUGACU..AU.AGA.GAG.A.GGG.CC.ACAC.CCGU..U.C.CCAUCCCGAAC\n+gi|526641887:1-118              uCC.UGGUGAUU..AU.GGA.GAG.A.AGG.CC.AUAC.CCGU..U.C.CCAUUCCGAAC\n+gi|216762:421-538               uCC.UGGUGAUU..AU.GGA.GAG.A.AGG.CC.AUAC.CCGU..U.C.CCAUUCCGAAC\n+gi|452192414:1-117              .UC.UGUGCGCA..AU.AGA.GCA.A.GGG.UC.ACAC.CCGU..U.C.CCAUCCCGAAC\n+gi|216767:249-367               .UGcUGGCGCCA..AU.AGA.GAG.G.GUG.AU.ACAC.CUGU..U.C.CCAUCCCGAAC\n+gi|507148580:1-114              .CC.CGGUGGCC..AU.AGC.AGA.G.UGG.AU.AUAC.CCGU..U.C.CCAUCCCGAAC\n+gi|452192347:1-113              .CC.CGGUGGCC..AC.AGC.AGG.G.UGG.AC.ACAC.CCGU..U.C.CCAUCCCGAAC\n+gi|451991762:1-111              .CC.UGGUGGUU..AA.AGA.AAA.G.AGG.AA.ACAC.CUGU..U.A.UCAUUCCGAAC\n+gi|452192371:1-112              .CC.CGGUGGUC..AU.AGC.AGA.G.UGG.UC.AUAC.CCGU..U.C.CCAUCCCGAAC\n+gi|452192196:1-116              .CU.UGGUGACU..AU.AGU.GGA.G.GUG.UU.ACAC.CUGU..U.C.CCAUUCCGAAC\n+gi|452192221:1-115              .--.CGGUGACC..AU.AGA.GAA.A.GUG.AU.ACAC.CCGU..U.C.CCAUUCCGAAC\n+gi|507148498:1-116              .-U.CGGUGACC..AC.AGA.GAA.A.GUG.AU.ACAC.CCGU..U.C.CCAUUCCGAAC\n+gi|507148564:1-107              .--.-GGUGGUU..AA.AGA.AAA.G.AGG.AA.ACAC.CUGU..U.A.UCAUUCCGAAC\n+gb|KU291355.1|:36-147           .CC.UGGUGGUU..AA.AGA.AAA.G.AGG.AA.ACAC.CUGU..U.A.UCAUUCCGAAC\n+gb|CP002873.1|:2145056-2145173  uUU.CGGUGACC..AU.AGA.GAA.A.GUG.AU.ACAC.CCGU..U.C.CCAUUCCGAAC\n+gb|CP006647.2|:c434557-434446   .CC.UGGUGAUU..GA.AGA.AAA.G.AGG.AA.ACAC.CUGU..U.A.UCAUUCCGAAC\n+gi|507148596:1-116              .CC.UGGUGACA..AU.CGCgGAG.G.GGG.UC.CCAC.CCGU..U.C.CCAUUCCGAAC\n+gi|451992005:1-111              .CC.UGGUGAUU..AA.AGA.AAA.G.AGG.AA.ACAC.CUGU..U.A.UCAUUCCGAAC\n+gb|CP004267.1|:c439103-438992   .CC.UGGUGAUU..AA.AGA.AAA.G.AGG.AA.ACAC.CUGU..U.A.UCAUUCCGAAC\n+gb|CP007564.1|:466414-466525    .CC.UGGUGGUU..AA.AGA.AAA.G.AGG.AA.ACAC.CUGU..U.A.UCAUUCCGAAC\n+gi|452192501:1-108              .--.UGGUGGUU..AA.AGA.AAA.G.AGG.AA.ACAC.CUGU..U.A.UCAUUCCGAAC\n+gi|452192103:1-116              .-U.CGGUGACC..AU.AGA.GAA.A.GUG.AU.ACAC.CCGU..U.C.CCAUUCCGAAC\n+gi|451991616:1-110              .-C.AGGUGGCC..AU.AGU.GGA.G.AGG.UA.AUAC.CCGU..U.C.CCAUCCCGAAC\n+gb|CP005745.1|:c431115-431004   .CC.UGGUGAUU..CA.AGA.AAA.G.AGG.AA.ACAC.CUGU..C.A.UCAUUCCGAAC\n+gb|CP005829.1|:c428563-428452   .CC.UGGUGAUU..AA.AGA.AAA.G.AGG.AA.ACAC.CUGU..U.A.UCAUUCCGAAC\n+gb|CP000049.1|:c435752-435641   cCC.UGGUGAUU..AA.AGA.AAA.G.AGG.AA.ACAC.CUGU..U.A.UCAUUCCGAAC\n+gb|CP005851.1|:481735-481846    .CC.UGGUGAUU..GA.AGA.AAA.G.AGG.AA.ACAC.CUGU..U.A.UCAUUCCGAAC\n+gi|526641892:1-111              .CC.UGGUGGUU..AA.AGA.AAA.G.AGG.AA.ACAC.CUGU..U.A.UCAUUCCGAAC\n+gi|4468110:2-116                .CC.UGAUGACC..AU.AGC.GAG.U.UGG.UA.CCAC.CCCU..U.C.CCAUCCCGAAC\n+gb|CP015136.1|:656498-656615    .CC.CGGUGGCC..AU.AGG.GGA.G.GGG.UC.ACAC.CCGU..U.C.CCAUUCCGAAC\n+gb|CP013099.1|:3650031-3650144  .-C.UGGCGGCA..AU.AGC.GAG.U.UGG.AA.CCAC.CCGA..U.C.CCAUCCCGAAC\n+gb|CP013066.1|:1198376-1198490  .-C.UGGUGCCU..UU.AGC.GGC.G.GGG.UC.ACAC.CUGU..U.C.CCAUCUCGAAC\n+gb|CP011214.1|:386649-386764    .-U.UGGUGACU..UG.AGC.GCG.G.UGG.AA.CCAC.CUGA..U.U.CCAUUCCGAAC\n+gi|38938:2-120                  .UA.CGGCGGUG..AU.AGC.GUG.G.GGG.AA.ACGC.CCGG..U.C.CCAUUCCGAAC\n+gb|CP009217.2|:1265876-1265989  .CU.UGGUGAUU..AU.AGC.ACA.G.GUG.AC.ACAC.CCGA..C.C.CCAUACCGAAC\n+gb|CP012154.1|:245759-245875    .CC.UGGUGGCA..AU.AGC.GGC.G.UGG.AA.CCAC.CCGA..C.C.CCAUCUCGAAC\n+gb|CP003801.1|:c1381757-1381640 .UG.UGGCGGUC..AU.AGC.GGA.G.UGA.AA.AAAC.CCGA..U.C.CCAUUUCGAAC\n+gi|662020025:1-116              .CC.UGGCGGCC..AU.AGC.GCG.G.UGG.UC.CCAC.CUGA..U.C.CCAUGCCGAAC\n+gb|CP011213.1|:c135211-135096   .-U.UGGUGCUU..-U.AAC.GUG.G.UGG.GU.ACAC.CUCU..U.C.CCAUUCCGAAC\n+gb|CP007706.1|:54289-54406      .CC.UGGUGGUU..AU.GGC.GGA.G.CGG.CU.GCAC.CCGA..U.C.CCAUUCCGAAC\n+gi|610510261:c2632714-2632599   .CU.CGGUGAUU..AU.UGC.GAG.G.AGC.CU.AAAC.CCGA..U.C.C'..b'  CCCUGGGAGAGUAGG.UCGCUGCCAGGC\n+gi|451991914:2-119              ACGCGGGAGAGUAGG.UCGCUGCCAGGU\n+gi|451991913:2-119              ACGCGGGAGAGUAGG.UCGCUGCCAGGU\n+gi|451991605:2-118              CCAUGUGAGAGUAGG.UCAUCGUCAAGA\n+gb|DQ532441.1|:c9325-9209       CCAUGUGAGAGUAGG.UCAUCGUCAAGA\n+gi|451991936:1-114              CCAUGUGAGAGUAGG.UCAUCGUCAAGA\n+gb|ANOR01000024.1|:5607-5723    CCAUGUGAGAGUAGG.UCAUCGUCAAGA\n+gi|4468164:2-115                CCGUGUGAAAGUAGG.UCAUCGUCAGGC\n+gi|48257:4-119                  CGCUGGGAGAGUAGG.UCGGUGCGGGGG\n+gi|224510780:2-119              GCCUGGGAGAGUAGG.UCGGUGCGGGGG\n+gi|451991880:1-115              ACGUGGGAGAGUAGG.UCACCGCCAGAC\n+gi|662019950:1-114              ACGUGGGAGAGUAGG.UCACCGCCAGAC\n+gb|CP009574.1|:c806523-806408   CCAGGUGAAAGUAGG.UAGUCGUCAGGA\n+gb|M33886.1|CYTRRAB:1-116       CCCUGUGAGAGUAGG.ACGUUGCCAGGC\n+gi|357428060:1522-1640          ACGUGGGAGAGUAGG.UCGCCGCCAGGU\n+gi|356882082:c402625-402507     ACGUGGGAGAGUAGG.UCGCCGCCAGGU\n+gb|CP012406.1|:664991-665109    ACGUGGGAGAGUAGG.UCGCCGCCAGGU\n+gi|451991726:1-113              UUGUGGGAAAGUAGG.ACGCUGCCG---\n+gi|452192559:1-115              ACCUGGGAGAGUAGG.UCGUCGCCAGGC\n+gb|M35166.1|PLLS5RRNAB:1-109    --GUGCGAAAGUAGG.U-AUCGCCGGA-\n+gb|CP011270.1|:4958369-4958477  AGGUGCGAAAGUAGG.UUAUUGCCGG--\n+gb|M35170.1|GEM5SRRNA:1-108     --GCGCGAGAGUAGG.-UAUCGCCGGCC\n+gi|452192403:1-110              ACUUGGAAAAGUAGG.UAGCCGCCA---\n+gi|452192413:1-112              ACCUGGGAGAGUAGG.UCGCCGCCUUCU\n+gb|M34775.1|ANCRRAA:1-116       ACCUGGGAGAGUAGG.UCGCUGCCAGGC\n+gb|M35569.1|LTTRR5S:3-117       GCGUGUGAAAGUAGG.UCAUCGUCAGGC\n+gi|452192547:1-114              ACGCGGGAGAGUAGG.UCGUUGCCAGGU\n+gi|40413:3-117                  GCGCGGGAGAGUAGG.UCGCCGCCAGG-\n+gb|M35310.1|HESRRA:1-117        GCGCGGGAGAGUAGG.UCGCUGCCGGAU\n+gb|M35568.1|VITRR5SXX:1-115     CCGUGUGAAAGUAGG.ACAUCGUCAGGC\n+gi|451991653:1-112              UGGUGGGAGAGUAGG.ACGCCGCC----\n+gi|861061:1-118                 CUGUGGGAGAGUAGG.ACGCCGCCAGCG\n+gi|860904:1-116                 GCGCGGGAGAGUAGG.UCGCUGCCGGAU\n+gi|507148422:2-119              GCGUGGGAGAGUAGG.UCGCUGCCGGAU\n+gi|451991792:1-116              GCGCGGGAGAGUAGG.UCGCUGCCGGAU\n+gi|452192399:1-117              GCGCGGGAGAGUAGG.UCGCUGCCGGAU\n+gi|451991925:1-116              CCAUGUGAGAGUAGG.UCAUCGCCAAGC\n+gi|47140:2-118                  CCAUGUGAGAGUAGG.UCAUUUCCAGGC\n+gi|451991802:1-112              AUGUGGGAAAGUAGG.UCGCCGCC----\n+gi|451992042:1-115              GGGUGGGAGAGUAGG.UCAUUGCCAG--\n+gi|452192373:1-117              GUGUGGGAGAGUAGG.ACGCCGCCGGAC\n+gi|451991686:3-118              ACGCGGGAGAGUAGG.UCGCUGCCAGGC\n+gi|328801247:21-136             CCAUGUGAGAGUAGG.UCAUCGUCAAGA\n+gb|KF901086.1|:6138-6252        GAGUGGGAAAAUAGG.UCACCGCCG---\n+gi|688725010:c1243-1130         CCGUGUGAAAGUAGG.UCAUCGUCAGGC\n+gb|AC167666.4|:1429-1542        CCGUGUGAAAGUAGG.UCAUCGUCAGGC\n+gi|690018164:461-574            CCGUGUGAAAGUAGG.UCAUCGUCAGGC\n+gi|451991889:1-113              CCGUGUGAAAGUAGG.UCAUCGUCAGGC\n+gi|688799711:426-535            CCGUGGGAGAGCAGG.UCGUCGCUA---\n+gi|307683286:c91863-91747       CUAUGGGAAAAUAGC.UCGGUGCCAGGA\n+gi|452192203:1-116              CUCUGGGAGAGCUGAuUCGCCGCC----\n+gi|765338145:1-113              GUGCGUGAAAGUAGG.UCAUCGUCAGAC\n+gb|CP006885.2|:13738-13857      CUCUGGGAAACGCGGuUCGCCGCC----\n+gi|689936939:3579-3693          GUGUGGGAGAGUAAG.ACGCUGCCG---\n+gi|841942794:7818-7933          CCAUGCGAGAGUAGG.GAACUGCCAGGC\n+gi|156148669:c114797-114685     ACGCGGGAGAGUAGG.UCGCUGCCA---\n+gi|687058084:c326-216           AUGUGGGAGAGUAGG.AGGCCGCC----\n+gi|687836183:c2166-2052         CCAUGCGAGAGUAGG.UCAUCGCCAGGG\n+gi|49258838:3-122               CUCUGGGAAACCCGGuUCGCCGCC----\n+gi|20609:4653-4769              UUUUGGGAAAGUAGC.UCAGUGCCAAGG\n+gi|451991654:1-118              CUCUGGGAAAUCCGGuUCGCCGCC----\n+gb|CP007060.1|:196416-196534    CUCUGGGAAAUUCGGuUCGCCGCC----\n+gi|37927900:1-119               CUCUGGGAAACCCGGuUCGCCGCC----\n+gi|526641914:1-118              CUCUGGGAAAUCCGGuUCGCCGCC----\n+                                    *  *                    \n'
b
diff -r 000000000000 -r 48fc2c21fe1c test-data/result.selected
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/result.selected Sat Mar 11 16:59:55 2017 -0500
b
@@ -0,0 +1,19 @@
+CLUSTAL W (1.8) multiple sequence alignment
+
+
+AB001721-1-2735-2851          CCCGGUGACUAUAGAGAGAGGGCCACACCCGUUCCCAUCCCGAACACGGAAGUUAAGCCU
+gi-258590833-c1582130-1582011 CCUGGUGGCCAUAGCGGAGGGGAAACACCCGUUCCCAUUCCGAACACGGAAGUGAAGCCC
+gb-M29856-1-BTTDNA-1-115      UCUGGUGACCAUAGCGGAGGGGAUCCACCCGUUCCCAUCCCGAACACGGAAGUUAAGCCC
+gi-452192175-1-116            --CGGUGGUCAUAGCGGAGGGGACACACCCGUUCCCAUUCCGAACACGGAAGUUAAGCCC
+gb-AF116563-1--6176-6294      UUCGGUGGUUAUAGCGGUGGGGAAACACCCGGUCCCAUUCCGAACCCGGUAGUUAAGCCC
+gb-CP012370-1--152653-152770  UCUGGUGGUAAUAGCGGAGGGGAAACACCCGUUCCCAUCCCGAACACGGCAGUUAAGCCC
+                                                                                          
+
+AB001721-1-2735-2851          CUCAUCGCUGAUGGUACUAUGUGGUUCGCUGCAUGGGAGAGUAGGACGUUGCCGGGU
+gi-258590833-c1582130-1582011 UCCAGCGCCGAUGGUACUGCGUGGGCAACUGCGCGGGAGAGUAGGUCGCCGCCAGGA
+gb-M29856-1-BTTDNA-1-115      UCCAGCGCCGAUGGUACUUGGGGA--UGACCCCUGGGAGAGUAGGUCGUUGCCAGGC
+gi-452192175-1-116            UCCAGCGCCGAUGGUACUGCCCUGGCGACGGGGCGGGAGAGUAGGUCGCUGCCGGGG
+gb-AF116563-1--6176-6294      GCCAGCGCCGAUGGUACUGCACUGGUGACGGUGUGGGAGAGUAGGUCGCCGCCGGAC
+gb-CP012370-1--152653-152770  UCCAGCGCCGAUGGUACUGGGGAG--UAUUCCCCGGGAGAGUAGGACACCGCCAGGA
+                                                                                       
+