Mercurial > repos > bgruening > cpat
changeset 2:7ccc6234b54e draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/cpat commit a447bd0404d45c185c4410dc9620e970d6995925
author | bgruening |
---|---|
date | Tue, 23 Jul 2024 15:09:07 +0000 |
parents | 1ac12c0cc7a0 |
children | |
files | cpat.xml test-data/all_fasta.loc.test test-data/test03_no_orgs.txt tool-data/all_fasta.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 6 files changed, 102 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/cpat.xml Mon Jan 29 10:44:49 2024 +0000 +++ b/cpat.xml Tue Jul 23 15:09:07 2024 +0000 @@ -2,21 +2,24 @@ <description>coding potential assessment</description> <macros> <token name="@TOOL_VERSION@">3.0.5</token> - <token name="@VERSION_SUFFIX@">0</token> + <token name="@VERSION_SUFFIX@">1</token> </macros> <requirements> <requirement type="package" version="@TOOL_VERSION@">cpat</requirement> </requirements> <version_command>cpat --version</version_command> <command detect_errors="exit_code"><![CDATA[ + #if $ref_source.source == "history" + ln -s '${ref_source.ref_fasta}' reference.fasta && + #elif $ref_source.source == "builtin" + ln -s '${ref_source.ref_fasta_builtin.fields.path}' reference.fasta && + #end if #set $gen_ext = $gene.ext ln -s '${gene}' './gene_sequences.${gen_ext}' && #set $cod_ext = $c.ext ln -s '${c}' './conding_sequences.${cod_ext}' && #set $ncod_ext = $n.ext ln -s '${n}' './nonconding_sequences.${ncod_ext}' && - #set $ref_ext = $r.ext - ln -s '${r}' './referece.${ref_ext}' && make_hexamer_tab.py -c './conding_sequences.${cod_ext}' -n './nonconding_sequences.${ncod_ext}' > './hexamer.tsv' && @@ -24,7 +27,7 @@ -x './hexamer.tsv' -c './conding_sequences.${cod_ext}' -n './nonconding_sequences.${ncod_ext}' - -r './referece.${ref_ext}' + -r './referece.fasta' --start='${start}' --stop='${stop}' --min-orf=$min_orf_model @@ -46,9 +49,22 @@ </command> <inputs> <param argument="--gene" type="data" format="fasta,fasta.gz" label="Query nucletide sequences" help="It is recommended to use short and unique sequence identifiers"/> - <param argument="-r" type="data" format="fasta,fasta.gz" label="Reference genome" help="Reference genome sequences in FASTA format" /> - <param argument="-c" type="data" format="fasta,fasta.gz" label="Coding sequences file" help="Coding sequence (must be CDS without UTR, i.e. from start coden to stop coden) in FASTA format" /> - <param argument="-n" type="data" format="fasta,fasta.gz" label="Non coding sequeces file" help="Noncoding sequences in FASTA format" /> + <conditional name="ref_source"> + <param type="select" label="Reference genome source" name="source"> + <option value="history" selected="true">Use from History</option> + <option value="builtin">Use Built-in</option> + </param> + <when value="history"> + <param type="data" format="fasta,fastq.gz" name="ref_fasta" argument="-r" label="Reference genome from History" help="Reference genome sequences in FASTA format"/> + </when> + <when value="builtin"> + <param type="select" name="ref_fasta_builtin" argument="-r" label="Reference genome from Built-in reference"> + <options from_data_table="all_fasta" /> + </param> + </when> + </conditional> + <param argument="-c" type="data" format="fasta,fasta.gz" label="Coding sequences file" help="Coding sequence (must be CDS without UTR, i.e. from start coden to stop coden) in FASTA format"/> + <param argument="-n" type="data" format="fasta,fasta.gz" label="Non coding sequences file" help="Noncoding sequences in FASTA format"/> <param argument="--start" type="text" value="ATG" label="Start codon"> <sanitizer invalid_char=""> <valid initial="string.letters"/> @@ -82,7 +98,10 @@ <tests> <test expect_num_outputs="4"> <param name="gene" value="sequences.fasta.gz"/> - <param name="r" value="sequences.fasta.gz"/> + <conditional name="ref_source"> + <param name="source" value="history" /> + <param name="ref_fasta" value="sequences.fasta.gz" /> + </conditional> <param name="c" value="sequences.fasta.gz"/> <param name="n" value="sequences.fasta.gz"/> <param name="start" value="ATG"/> @@ -114,7 +133,10 @@ </test> <test expect_num_outputs="4"> <param name="gene" value="sequences.fasta.gz"/> - <param name="r" value="sequences.fasta.gz"/> + <conditional name="ref_source"> + <param name="source" value="history" /> + <param name="ref_fasta" value="sequences.fasta.gz" /> + </conditional> <param name="c" value="sequences.fasta.gz"/> <param name="n" value="sequences.fasta.gz"/> <param name="start" value="ATG"/> @@ -144,6 +166,41 @@ </output> <output name="no_orf_seqs" file="test02_no_orgs.txt" ftype="txt"/> </test> + <test expect_num_outputs="4"> + <param name="gene" value="sequences.fasta.gz"/> + <conditional name="ref_source"> + <param name="source" value="builtin"/> + <param name="ref_fasta_builtin" value="test1" /> + </conditional> + <param name="c" value="sequences.fasta.gz"/> + <param name="n" value="sequences.fasta.gz"/> + <param name="start" value="ATG"/> + <param name="stop" value="TAG,TAA,TGA"/> + <param name="min_orf_model" value="15"/> + <param name="min_orf_cpat" value="60"/> + <param name="antisense" value="true"/> + <param name="top_orf" value="10"/> + <param name="best_orf" value="p"/> + <output name="orf_seqs" ftype="fasta"> + <assert_contents> + <has_text text=">ENST00000616016.5_ORF_1"/> + <has_n_lines n="57357"/> + </assert_contents> + </output> + <output name="orf_seqs_prob" ftype="tsv"> + <assert_contents> + <has_text text="ENST00000616016.5_ORF_1"/> + <has_n_lines n="11667"/> + </assert_contents> + </output> + <output name="orf_seqs_prob_best" ftype="tsv"> + <assert_contents> + <has_text text="ENST00000683977.1"/> + <has_n_lines n="1301"/> + </assert_contents> + </output> + <output name="no_orf_seqs" file="test03_no_orgs.txt" ftype="txt"/> + </test> </tests> <help><![CDATA[
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/all_fasta.loc.test Tue Jul 23 15:09:07 2024 +0000 @@ -0,0 +1,10 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +test1 "Test Genome" ${__HERE__}/test1.fasta.gz \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test03_no_orgs.txt Tue Jul 23 15:09:07 2024 +0000 @@ -0,0 +1,1 @@ +ENST00000637839.1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Tue Jul 23 15:09:07 2024 +0000 @@ -0,0 +1,10 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +#test1 Test-Genome ./test-data/test1.fa.gz \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Jul 23 15:09:07 2024 +0000 @@ -0,0 +1,7 @@ +<?xml version="1.0"?> +<tables> + <table name="all_fasta" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Tue Jul 23 15:09:07 2024 +0000 @@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of reference genome files in fasta format --> + <table name="all_fasta" comment_char="#"> + <columns>value, name, path</columns> + <file path="${__HERE__}/test-data/all_fasta.loc.test" /> + </table> +</tables> \ No newline at end of file