Mercurial > repos > galaxyp > fasta_merge_files_and_filter_unique_sequences
comparison fasta_merge_files_and_filter_unique_sequences.xml @ 3:9ad0d336e5ed draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fasta_merge_files_and_filter_unique_sequences commit 96128b1b32e31c88f08201fd59a07fb1057aafbe
author | galaxyp |
---|---|
date | Fri, 03 Feb 2017 14:27:56 -0500 |
parents | 379c41d859aa |
children | 8462a4e9f86e |
comparison
equal
deleted
inserted
replaced
2:379c41d859aa | 3:9ad0d336e5ed |
---|---|
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="2.7.12">python</requirement> | 4 <requirement type="package" version="2.7.12">python</requirement> |
5 </requirements> | 5 </requirements> |
6 <command> | 6 <command> |
7 python '$__tool_directory__/fasta_merge_files_and_filter_unique_sequences.py' | 7 python '$__tool_directory__/fasta_merge_files_and_filter_unique_sequences.py' |
8 '$output' $uniqueness_criterion | 8 '$output' $uniqueness_criterion '$accession_parser' |
9 #for $input in $inputs: | 9 #for $input in $inputs: |
10 '$input' | 10 '$input' |
11 #end for | 11 #end for |
12 </command> | 12 </command> |
13 <inputs> | 13 <inputs> |
14 <param name="inputs" format="fasta" multiple="True" type="data" label="Input FASTA files"/> | 14 <param name="inputs" format="fasta" multiple="True" type="data" label="Input FASTA files"/> |
15 <param name="uniqueness_criterion" type="select" label="How are sequences judged to be unique?"> | 15 <param name="uniqueness_criterion" type="select" label="How are sequences judged to be unique?"> |
16 <option value="sequence" selected="true">Accession and Sequence</option> | 16 <option value="sequence" selected="true">Accession and Sequence</option> |
17 <option value="accession">Accession Only</option> | 17 <option value="accession">Accession Only</option> |
18 </param> | 18 </param> |
19 <param name="accession_parser" type="text" label="Accession Parsing Regex" value="^>([^ ]+).*$" help="Regular expression with 1 capture group; the capture group is the accession (which must be unique)"> | |
20 <sanitizer> | |
21 <valid> | |
22 <add preset="string.printable"/> | |
23 <remove value="\" /> | |
24 <remove value="'" /> | |
25 </valid> | |
26 <mapping initial="none"> | |
27 <add source="\" target="__backslash__" /> | |
28 <add source="'" target="__sq__"/> | |
29 </mapping> | |
30 </sanitizer> | |
31 </param> | |
19 </inputs> | 32 </inputs> |
20 <outputs> | 33 <outputs> |
21 <data format="fasta" name="output" label="Merged and Filtered FASTA from ${on_string}"/> | 34 <data format="fasta" name="output" label="Merged and Filtered FASTA from ${on_string}"/> |
22 </outputs> | 35 </outputs> |
23 <tests> | 36 <tests> |
24 <test> | 37 <test> |
25 <param name="inputs" value="1.fa,2.fa" ftype="fasta" /> | 38 <param name="inputs" value="1.fa,2.fa" ftype="fasta" /> |
26 <param name="uniqueness_criterion" value="sequence" /> | 39 <param name="uniqueness_criterion" value="sequence" /> |
40 <param name="accession_parser" value="^>([^ |]+).*$" /> | |
27 <output name="output" file="res-sequence.fa" ftype="fasta" /> | 41 <output name="output" file="res-sequence.fa" ftype="fasta" /> |
28 <assert_stdout> | 42 <assert_stdout> |
29 <has_line line="Skipping protein '>one_2' with duplicate sequence (first seen as '>one')" /> | 43 <has_line line="Skipping protein '>one_2' with duplicate sequence (first seen as '>one')" /> |
30 <has_line line="Skipping protein '>two_2' with duplicate sequence (first seen as '>two')" /> | 44 <has_line line="Skipping protein '>two_2' with duplicate sequence (first seen as '>two')" /> |
31 <has_line line="Skipping protein '>three_2' with duplicate header" /> | 45 <has_line line="Skipping protein '>three_2|456' with duplicate accession" /> |
46 <has_line line="Skipping protein '>three_2 789' with duplicate accession" /> | |
32 </assert_stdout> | 47 </assert_stdout> |
33 </test> | 48 </test> |
34 <test> | 49 <test> |
35 <param name="inputs" value="1.fa,2.fa" ftype="fasta" /> | 50 <param name="inputs" value="1.fa,2.fa" ftype="fasta" /> |
36 <param name="uniqueness_criterion" value="accession" /> | 51 <param name="uniqueness_criterion" value="accession" /> |
52 <param name="accession_parser" value="^>([^ |]+).*$" /> | |
37 <output name="output" file="res-accession.fa" ftype="fasta" /> | 53 <output name="output" file="res-accession.fa" ftype="fasta" /> |
38 <assert_stdout> | 54 <assert_stdout> |
39 <has_line line="Skipping protein '>three_2' with duplicate header" /> | 55 <has_line line="Skipping protein '>three_2|456' with duplicate accession" /> |
56 <has_line line="Skipping protein '>three_2 789' with duplicate accession" /> | |
40 </assert_stdout> | 57 </assert_stdout> |
41 </test> | 58 </test> |
42 </tests> | 59 </tests> |
43 <help> | 60 <help> |
44 <![CDATA[ | 61 <![CDATA[ |
47 Concatenate FASTA database files together. | 64 Concatenate FASTA database files together. |
48 | 65 |
49 If the uniqueness criterion is "Accession and Sequence", only the first appearence of each unique sequence will appear in the output. | 66 If the uniqueness criterion is "Accession and Sequence", only the first appearence of each unique sequence will appear in the output. |
50 Otherwise, duplicate sequences are allowed, but only the first appearance of each accession will appear in the output. | 67 Otherwise, duplicate sequences are allowed, but only the first appearance of each accession will appear in the output. |
51 | 68 |
52 In the context of this script, the accession is the entire header line. | 69 The default accession parser will treat everything in the header before the first space as the accession. |
53 | 70 |
54 ------ | 71 ------ |
55 | 72 |
56 **Citation** | 73 **Citation** |
57 | 74 |