Mercurial > repos > bgruening > pfamscan
changeset 0:15cff34c2005 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/pfamscan commit c27a05e92e6e03545903dd2ff90976f8cab5ebf7
author | bgruening |
---|---|
date | Sat, 04 Feb 2023 16:33:20 +0000 |
parents | |
children | |
files | pfamscan.xml test-data/Pfam-A.hmm.dat.gz test-data/Pfam-A.hmm.gz test-data/active_site.dat.gz test-data/sequences.fasta.gz test-data/test01.tab test-data/test02.tab test-data/test03.tab test-data/test04.tab |
diffstat | 9 files changed, 261 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfamscan.xml Sat Feb 04 16:33:20 2023 +0000 @@ -0,0 +1,158 @@ +<tool id="pfamscan" name="PfamScan" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> + <description>search a FASTA sequence against a library of Pfam HMM</description> + <macros> + <token name="@TOOL_VERSION@">1.6</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> + <xrefs> + <xref type="bio.tools">pfamscan</xref> + </xrefs> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">pfam_scan</requirement> + <requirement type="package" version="3.3.2">hmmer</requirement> + </requirements> + <version_command>pfam_scan.pl --version</version_command> + <command detect_errors="exit_code"><![CDATA[ + mkdir -p './pfam_files' && + ln -s '${pfam_library}' './pfam_files/Pfam-A.hmm' && + ln -s '${pfam_data}' './pfam_files/Pfam-A.hmm.dat' && + #if $active_sites.selector == 'true' + ln -s '${active_sites.active_file}' './pfam_files/active_site.dat' && + #end if + hmmpress './pfam_files/Pfam-A.hmm' && + pfam_scan.pl + -cpu \${GALAXY_SLOTS:-4} + -fasta '${fasta}' + -dir './pfam_files/' + -outfile './output.tab' + #if $advanced_options.e_seq + -e_seq $advanced_options.e_seq + #end if + #if $advanced_options.e_dom + -e_dom $advanced_options.e_dom + #end if + #if $advanced_options.b_seq + -b_seq $advanced_options.b_seq + #end if + #if $advanced_options.b_dom + -b_dom $advanced_options.b_dom + #end if + #if $active_sites.selector == 'true' + -as + #end if + && tail -n +28 "./output.tab" > "./output_fixed.tab" + ]]> + </command> + <inputs> + <param argument="-fasta" type="data" format="fasta" label="Protein sequences FASTA file"/> + <param name="pfam_library" type="data" format="hmm3" label="Pfam-A HMM library" help="Pfam-A HMMs in an HMM library searchable with the hmmscan program." /> + <param name="pfam_data" type="data" format="stockholm" label="Pfam-A HMM Stockholm file" help="Stockholm format is a multiple sequence alignment format used + by Pfam, Rfam and Dfam, to disseminate protein, RNA and DNA sequence alignments." /> + <conditional name="active_sites"> + <param name="selector" type="select" label="Predict active site residues" help="Predict active site residues for Pfam-A matches"> + <option value="false">Disabled</option> + <option value="true" selected="true">Enabled</option> + </param> + <when value="false"/> + <when value="true"> + <param name="active_file" type="data" format="txt" label="Active sites file" help="This file is required for predicting the active site residues." /> + </when> + </conditional> + <section name="advanced_options" title="Advanced options"> + <param argument="-e_seq" type="float" min="0" value="" optional="true" label="Hmmscan evalue sequence cutoff" help="Specify hmmscan evalue sequence cutoff + for Pfam-A searches (default Pfam defined)" /> + <param argument="-e_dom" type="float" min="0" value="" optional="true" label="Hmmscan evalue domain cutoff" help="Specify hmmscan evalue domain cutoff for + Pfam-A searches (default Pfam defined)" /> + <param argument="-b_seq" type="float" min="0" value="" optional="true" label="Hmmscan bit score sequence cutoff" help="Specify hmmscan bit score sequence + cutoff for Pfam-A searches (default Pfam defined)" /> + <param argument="-b_dom" type="float" min="0" value="" optional="true" label="Hmmscan bit score domain cutoff" help="Specify hmmscan bit score domain cutoff + for Pfam-A searches (default Pfam defined)" /> + <param argument="-clan_overlap" type="boolean" truevalue="-clan_overlap" falsevalue="" checked="false" label="Clan overlap" help="Show overlapping hits within + clan member families (applies to Pfam-A families only)" /> + <param argument="-align" type="boolean" truevalue="-align" falsevalue="" checked="false" label="Show alignment" help="Show the HMM-sequence alignment for each match" /> + </section> + </inputs> + <outputs> + <data name="output" format="tabular" from_work_dir="output_fixed.tab" label="${tool.name} on ${on_string}"/> + </outputs> + <tests> + <!-- Test 01: Default parameters not active sites --> + <test expect_num_outputs="1"> + <param name="fasta" value="sequences.fasta.gz"/> + <param name="pfam_library" value="Pfam-A.hmm.gz"/> + <param name="pfam_data" value="Pfam-A.hmm.dat.gz"/> + <conditional name="active_sites"> + <param name="selector" value="false"/> + </conditional> + <output name="output" file="test01.tab" ftype="tabular"/> + </test> + <!-- Test 02: Default parameters: active sites --> + <test expect_num_outputs="1"> + <param name="fasta" value="sequences.fasta.gz"/> + <param name="pfam_library" value="Pfam-A.hmm.gz"/> + <param name="pfam_data" value="Pfam-A.hmm.dat.gz"/> + <conditional name="active_sites"> + <param name="selector" value="true"/> + <param name="active_file" value="active_site.dat.gz"/> + </conditional> + <output name="output" file="test02.tab" ftype="tabular"/> + </test> + <!-- Test 03: Non default parameters: active sites --> + <test expect_num_outputs="1"> + <param name="fasta" value="sequences.fasta.gz"/> + <param name="pfam_library" value="Pfam-A.hmm.gz"/> + <param name="pfam_data" value="Pfam-A.hmm.dat.gz"/> + <section name="advanced_options"> + <param name="e_seq" value="1.2"/> + <param name="e_dom" value="1.3"/> + <param name="clan_overlap" value="true"/> + </section> + <conditional name="active_sites"> + <param name="selector" value="true"/> + <param name="active_file" value="active_site.dat.gz"/> + </conditional> + <output name="output" file="test03.tab" ftype="tabular"/> + </test> + <!-- Test 04: Non default parameters: active sites --> + <test expect_num_outputs="1"> + <param name="fasta" value="sequences.fasta.gz"/> + <param name="pfam_library" value="Pfam-A.hmm.gz"/> + <param name="pfam_data" value="Pfam-A.hmm.dat.gz"/> + <section name="advanced_options"> + <param name="b_seq" value="1.4"/> + <param name="b_dom" value="1.2"/> + <param name="align" value="true"/> + </section> + <conditional name="active_sites"> + <param name="selector" value="true"/> + <param name="active_file" value="active_site.dat.gz"/> + </conditional> + <output name="output" file="test04.tab" ftype="tabular"/> + </test> + </tests> + <help><![CDATA[ + +.. class:: infomark + +**Purpose** + +Search one or more sequences for matching Pfam domains. Depending on the user options, the script can also process the results such that overlaps between families belonging to the +same clan are resolved and can predict active sites. + +---- + +.. class:: infomark + +**Required files** + +To run PfamScan you will need to download the following files from the Pfam ftp site: + +- Pfam-A HMMs in an HMM library searchable with the hmmscan program: `Pfam-A.hmm.gz <https://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.hmm.gz>`_ +- Pfam-A HMM Stockholm file associated with each HMM required for PfamScan: `Pfam-A.hmm.dat.gz <https://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.hmm.dat.gz>`_ +- Active sites: `active_sites.dat.gz <ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/active_site.dat.gz>`_ + +]]></help> + <citations> + <citation type="doi">10.1093/nar/gkt006</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test01.tab Sat Feb 04 16:33:20 2023 +0000 @@ -0,0 +1,14 @@ +# <seq id> <alignment start> <alignment end> <envelope start> <envelope end> <hmm acc> <hmm name> <type> <hmm start> <hmm end> <hmm length> <bit score> <E-value> <significance> <clan> + +ENST00000006658.11 79 239 78 240 PF03190.18 Thioredox_DsbH Family 2 162 163 243.0 4.7e-75 1 CL0172 +ENST00000282903.10 673 757 666 758 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.6 4.4e-08 1 CL0029 +ENST00000356488.8 63 223 62 224 PF03190.18 Thioredox_DsbH Family 2 162 163 243.1 4.5e-75 1 CL0172 +ENST00000360060.7 652 736 645 737 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.7 4.3e-08 1 CL0029 +ENST00000494950.5 618 702 611 703 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.8 4e-08 1 CL0029 +ENST00000634597.1 63 223 62 224 PF03190.18 Thioredox_DsbH Family 2 162 163 243.1 4.5e-75 1 CL0172 +ENST00000703518.1 673 757 666 758 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.6 4.4e-08 1 CL0029 +ENST00000703522.1 673 757 666 758 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.6 4.4e-08 1 CL0029 +ENST00000703523.1 652 736 645 737 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.7 4.3e-08 1 CL0029 +ENST00000503063.5 63 223 62 224 PF03190.18 Thioredox_DsbH Family 2 162 163 244.0 2.4e-75 1 CL0172 +ENST00000512181.5 63 157 62 161 PF03190.18 Thioredox_DsbH Family 2 96 163 166.2 2e-51 1 CL0172 +ENST00000512181.5 181 249 177 250 PF03190.18 Thioredox_DsbH Family 94 162 163 61.8 2.4e-19 1 CL0172
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test02.tab Sat Feb 04 16:33:20 2023 +0000 @@ -0,0 +1,14 @@ +# <seq id> <alignment start> <alignment end> <envelope start> <envelope end> <hmm acc> <hmm name> <type> <hmm start> <hmm end> <hmm length> <bit score> <E-value> <significance> <clan> <predicted_active_site_residues> + +ENST00000006658.11 79 239 78 240 PF03190.18 Thioredox_DsbH Family 2 162 163 243.0 4.7e-75 1 CL0172 +ENST00000282903.10 673 757 666 758 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.6 4.4e-08 1 CL0029 +ENST00000356488.8 63 223 62 224 PF03190.18 Thioredox_DsbH Family 2 162 163 243.1 4.5e-75 1 CL0172 +ENST00000360060.7 652 736 645 737 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.7 4.3e-08 1 CL0029 +ENST00000494950.5 618 702 611 703 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.8 4e-08 1 CL0029 +ENST00000634597.1 63 223 62 224 PF03190.18 Thioredox_DsbH Family 2 162 163 243.1 4.5e-75 1 CL0172 +ENST00000703518.1 673 757 666 758 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.6 4.4e-08 1 CL0029 +ENST00000703522.1 673 757 666 758 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.6 4.4e-08 1 CL0029 +ENST00000703523.1 652 736 645 737 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.7 4.3e-08 1 CL0029 +ENST00000503063.5 63 223 62 224 PF03190.18 Thioredox_DsbH Family 2 162 163 244.0 2.4e-75 1 CL0172 +ENST00000512181.5 63 157 62 161 PF03190.18 Thioredox_DsbH Family 2 96 163 166.2 2e-51 1 CL0172 +ENST00000512181.5 181 249 177 250 PF03190.18 Thioredox_DsbH Family 94 162 163 61.8 2.4e-19 1 CL0172
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test03.tab Sat Feb 04 16:33:20 2023 +0000 @@ -0,0 +1,38 @@ +# <seq id> <alignment start> <alignment end> <envelope start> <envelope end> <hmm acc> <hmm name> <type> <hmm start> <hmm end> <hmm length> <bit score> <E-value> <significance> <clan> <predicted_active_site_residues> + +ENST00000006658.11 79 239 78 240 PF03190.18 Thioredox_DsbH Family 2 162 163 243.0 4.7e-75 1 CL0172 +ENST00000265594.9 20 94 12 100 PF13532.9 2OG-FeII_Oxy_2 Domain 38 118 194 11.5 0.00086 0 CL0029 +ENST00000282903.10 673 757 666 758 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.6 4.4e-08 1 CL0029 +ENST00000346299.10 562 624 556 627 PF12578.11 3-PAP Family 70 129 132 13.0 0.00022 0 No_clan +ENST00000352297.11 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000356488.8 63 223 62 224 PF03190.18 Thioredox_DsbH Family 2 162 163 243.1 4.5e-75 1 CL0172 +ENST00000360060.7 652 736 645 737 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.7 4.3e-08 1 CL0029 +ENST00000393106.6 30 91 17 98 PF02826.22 2-Hacid_dh_C Domain 38 99 178 16.0 2e-05 0 CL0063 +ENST00000393110.7 40 101 26 108 PF02826.22 2-Hacid_dh_C Domain 38 99 178 15.9 2e-05 0 CL0063 +ENST00000409459.5 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000409811.5 30 91 17 98 PF02826.22 2-Hacid_dh_C Domain 38 99 178 16.1 1.9e-05 0 CL0063 +ENST00000436648.9 183 256 153 279 PF02826.22 2-Hacid_dh_C Domain 33 105 178 19.7 1.5e-06 0 CL0063 +ENST00000436648.9 383 403 376 415 PF02826.22 2-Hacid_dh_C Domain 133 153 178 0.4 1.2 0 CL0063 +ENST00000494950.5 618 702 611 703 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.8 4e-08 1 CL0029 +ENST00000579436.7 382 414 380 416 PF12729.10 4HB_MCP_1 Family 101 133 181 8.7 0.0039 0 CL0457 +ENST00000591451.5 264 330 234 354 PF02826.22 2-Hacid_dh_C Domain 33 104 178 18.9 2.6e-06 0 CL0063 +ENST00000634597.1 63 223 62 224 PF03190.18 Thioredox_DsbH Family 2 162 163 243.1 4.5e-75 1 CL0172 +ENST00000675196.1 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000675438.1 459 521 453 524 PF12578.11 3-PAP Family 70 129 132 13.4 0.00018 0 No_clan +ENST00000675454.1 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000675489.1 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000676166.1 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000676261.1 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000676272.1 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000676440.1 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000694943.1 67 199 32 239 PF01073.22 3Beta_HSD Family 49 195 280 14.3 5.1e-05 0 CL0063 predicted_active_site +ENST00000703518.1 673 757 666 758 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.6 4.4e-08 1 CL0029 +ENST00000703522.1 673 757 666 758 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.6 4.4e-08 1 CL0029 +ENST00000703523.1 652 736 645 737 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.7 4.3e-08 1 CL0029 +ENST00000514335.1 146 275 138 286 PF00244.23 14-3-3 Repeat 24 148 222 13.6 0.00013 0 CL0020 +ENST00000694942.1 67 259 32 277 PF01073.22 3Beta_HSD Family 49 257 280 18.6 2.6e-06 0 CL0063 predicted_active_site +ENST00000503063.5 63 223 62 224 PF03190.18 Thioredox_DsbH Family 2 162 163 244.0 2.4e-75 1 CL0172 +ENST00000512181.5 63 157 62 161 PF03190.18 Thioredox_DsbH Family 2 96 163 166.2 2e-51 1 CL0172 +ENST00000512181.5 181 249 177 250 PF03190.18 Thioredox_DsbH Family 94 162 163 61.8 2.4e-19 1 CL0172 +ENST00000589104.5 681 717 676 737 PF09038.13 53-BP1_Tudor Domain 8 44 122 8.5 0.0066 0 CL0049 +ENST00000589104.5 744 782 736 793 PF09038.13 53-BP1_Tudor Domain 12 51 122 4.7 0.096 0 CL0049
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test04.tab Sat Feb 04 16:33:20 2023 +0000 @@ -0,0 +1,37 @@ +# <seq id> <alignment start> <alignment end> <envelope start> <envelope end> <hmm acc> <hmm name> <type> <hmm start> <hmm end> <hmm length> <bit score> <E-value> <significance> <clan> <predicted_active_site_residues> + +ENST00000006658.11 79 239 78 240 PF03190.18 Thioredox_DsbH Family 2 162 163 243.0 4.7e-75 1 CL0172 +ENST00000265594.9 20 94 12 100 PF13532.9 2OG-FeII_Oxy_2 Domain 38 118 194 11.5 0.00086 0 CL0029 +ENST00000282903.10 673 757 666 758 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.6 4.4e-08 1 CL0029 +ENST00000346299.10 562 624 556 627 PF12578.11 3-PAP Family 70 129 132 13.0 0.00022 0 No_clan +ENST00000352297.11 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000356488.8 63 223 62 224 PF03190.18 Thioredox_DsbH Family 2 162 163 243.1 4.5e-75 1 CL0172 +ENST00000360060.7 652 736 645 737 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.7 4.3e-08 1 CL0029 +ENST00000393106.6 30 91 17 98 PF02826.22 2-Hacid_dh_C Domain 38 99 178 16.0 2e-05 0 CL0063 +ENST00000393110.7 40 101 26 108 PF02826.22 2-Hacid_dh_C Domain 38 99 178 15.9 2e-05 0 CL0063 +ENST00000409459.5 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000409811.5 30 91 17 98 PF02826.22 2-Hacid_dh_C Domain 38 99 178 16.1 1.9e-05 0 CL0063 +ENST00000436648.9 183 256 153 279 PF02826.22 2-Hacid_dh_C Domain 33 105 178 19.7 1.5e-06 0 CL0063 +ENST00000494950.5 618 702 611 703 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.8 4e-08 1 CL0029 +ENST00000579436.7 382 414 380 416 PF12729.10 4HB_MCP_1 Family 101 133 181 8.7 0.0039 0 CL0457 +ENST00000591451.5 264 330 234 354 PF02826.22 2-Hacid_dh_C Domain 33 104 178 18.9 2.6e-06 0 CL0063 +ENST00000634597.1 63 223 62 224 PF03190.18 Thioredox_DsbH Family 2 162 163 243.1 4.5e-75 1 CL0172 +ENST00000675196.1 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000675438.1 459 521 453 524 PF12578.11 3-PAP Family 70 129 132 13.4 0.00018 0 No_clan +ENST00000675454.1 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000675489.1 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000676166.1 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000676261.1 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000676272.1 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000676440.1 490 552 484 555 PF12578.11 3-PAP Family 70 129 132 13.3 0.00019 0 No_clan +ENST00000694943.1 67 199 32 239 PF01073.22 3Beta_HSD Family 49 195 280 14.3 5.1e-05 0 CL0063 predicted_active_site +ENST00000703518.1 673 757 666 758 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.6 4.4e-08 1 CL0029 +ENST00000703522.1 673 757 666 758 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.6 4.4e-08 1 CL0029 +ENST00000703523.1 652 736 645 737 PF03171.23 2OG-FeII_Oxy Domain 6 100 101 25.7 4.3e-08 1 CL0029 +ENST00000514335.1 146 275 138 286 PF00244.23 14-3-3 Repeat 24 148 222 13.6 0.00013 0 CL0020 +ENST00000694942.1 67 259 32 277 PF01073.22 3Beta_HSD Family 49 257 280 18.6 2.6e-06 0 CL0063 predicted_active_site +ENST00000503063.5 63 223 62 224 PF03190.18 Thioredox_DsbH Family 2 162 163 244.0 2.4e-75 1 CL0172 +ENST00000512181.5 63 157 62 161 PF03190.18 Thioredox_DsbH Family 2 96 163 166.2 2e-51 1 CL0172 +ENST00000512181.5 181 249 177 250 PF03190.18 Thioredox_DsbH Family 94 162 163 61.8 2.4e-19 1 CL0172 +ENST00000589104.5 681 717 676 737 PF09038.13 53-BP1_Tudor Domain 8 44 122 8.5 0.0066 0 CL0049 +ENST00000589104.5 744 782 736 793 PF09038.13 53-BP1_Tudor Domain 12 51 122 4.7 0.096 0 CL0049