Mercurial > repos > bgruening > cpat
view cpat.xml @ 2:7ccc6234b54e draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/cpat commit a447bd0404d45c185c4410dc9620e970d6995925
author | bgruening |
---|---|
date | Tue, 23 Jul 2024 15:09:07 +0000 |
parents | 1ac12c0cc7a0 |
children |
line wrap: on
line source
<tool id="cpat" name="CPAT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> <description>coding potential assessment</description> <macros> <token name="@TOOL_VERSION@">3.0.5</token> <token name="@VERSION_SUFFIX@">1</token> </macros> <requirements> <requirement type="package" version="@TOOL_VERSION@">cpat</requirement> </requirements> <version_command>cpat --version</version_command> <command detect_errors="exit_code"><![CDATA[ #if $ref_source.source == "history" ln -s '${ref_source.ref_fasta}' reference.fasta && #elif $ref_source.source == "builtin" ln -s '${ref_source.ref_fasta_builtin.fields.path}' reference.fasta && #end if #set $gen_ext = $gene.ext ln -s '${gene}' './gene_sequences.${gen_ext}' && #set $cod_ext = $c.ext ln -s '${c}' './conding_sequences.${cod_ext}' && #set $ncod_ext = $n.ext ln -s '${n}' './nonconding_sequences.${ncod_ext}' && make_hexamer_tab.py -c './conding_sequences.${cod_ext}' -n './nonconding_sequences.${ncod_ext}' > './hexamer.tsv' && make_logitModel.py -x './hexamer.tsv' -c './conding_sequences.${cod_ext}' -n './nonconding_sequences.${ncod_ext}' -r './referece.fasta' --start='${start}' --stop='${stop}' --min-orf=$min_orf_model -o './logit_model' && cpat.py --verbose -x './hexamer.tsv' -d './logit_model.logit.RData' -g './gene_sequences.${gen_ext}' --top-orf=$top_orf --antisense --start='${start}' --stop='${stop}' --min-orf=$min_orf_cpat $antisense --top-orf=$top_orf --best-orf=$best_orf -o 'output' ]]> </command> <inputs> <param argument="--gene" type="data" format="fasta,fasta.gz" label="Query nucletide sequences" help="It is recommended to use short and unique sequence identifiers"/> <conditional name="ref_source"> <param type="select" label="Reference genome source" name="source"> <option value="history" selected="true">Use from History</option> <option value="builtin">Use Built-in</option> </param> <when value="history"> <param type="data" format="fasta,fastq.gz" name="ref_fasta" argument="-r" label="Reference genome from History" help="Reference genome sequences in FASTA format"/> </when> <when value="builtin"> <param type="select" name="ref_fasta_builtin" argument="-r" label="Reference genome from Built-in reference"> <options from_data_table="all_fasta" /> </param> </when> </conditional> <param argument="-c" type="data" format="fasta,fasta.gz" label="Coding sequences file" help="Coding sequence (must be CDS without UTR, i.e. from start coden to stop coden) in FASTA format"/> <param argument="-n" type="data" format="fasta,fasta.gz" label="Non coding sequences file" help="Noncoding sequences in FASTA format"/> <param argument="--start" type="text" value="ATG" label="Start codon"> <sanitizer invalid_char=""> <valid initial="string.letters"/> </sanitizer> <validator type="regex">[a-zA-Z]+</validator> </param> <param argument="--stop" type="text" value="TAG,TAA,TGA" label="Stop codons"> <sanitizer invalid_char=""> <valid initial="string.letters"> <add value="," /> </valid> </sanitizer> <validator type="regex">[a-zA-Z,]+</validator> </param> <param argument="--min-orf" name="min_orf_model" type="integer" min="0" value="30" label="Minimum ORF length" help="Minimum ORF length in nucleotides" /> <param argument="--min-orf" name="min_orf_cpat" type="integer" min="0" value="75" label="Minimum ORF length" help="Minimum ORF length in nucleotides" /> <param argument="--antisense" type="boolean" truevalue="--antisense" falsevalue="" checked="false" label="Search for ORFs from the anti-sense strand"/> <param argument="--top-orf" type="integer" min="0" value="5" label="Number of ORF candidates reported" help="RNAs may have dozens of putative ORFs, in most cases, the real ORF is ranked (by size) in the top several" /> <param argument="--best-orf" type="select" label="Criteria to select the best ORF"> <option value="l">ORF length (l)</option> <option value="p" selected="true">Coding probability (p)</option> </param> </inputs> <outputs> <data name="orf_seqs" format="fasta" from_work_dir="output.ORF_seqs.fa" label="${tool.name} on ${on_string}: ORF sequences (FASTA)"/> <data name="orf_seqs_prob" format="tsv" from_work_dir="output.ORF_prob.tsv" label="${tool.name} on ${on_string}: ORF probabiities (TSV)"/> <data name="orf_seqs_prob_best" format="tsv" from_work_dir="output.ORF_prob.best.tsv" label="${tool.name} on ${on_string}: ORF best probabilities (TSV)"/> <data name="no_orf_seqs" format="txt" from_work_dir="output.no_ORF.txt" label="${tool.name} on ${on_string}: no ORFs (TXT)"/> </outputs> <tests> <test expect_num_outputs="4"> <param name="gene" value="sequences.fasta.gz"/> <conditional name="ref_source"> <param name="source" value="history" /> <param name="ref_fasta" value="sequences.fasta.gz" /> </conditional> <param name="c" value="sequences.fasta.gz"/> <param name="n" value="sequences.fasta.gz"/> <param name="start" value="ATG"/> <param name="stop" value="TAG,TAA,TGA"/> <param name="min_orf_model" value="30"/> <param name="min_orf_cpat" value="75"/> <param name="antisense" value="false"/> <param name="top_orf" value="5"/> <param name="best_orf" value="l"/> <output name="orf_seqs" ftype="fasta"> <assert_contents> <has_text text=">ENST00000616016.5_ORF_1"/> <has_n_lines n="41009"/> </assert_contents> </output> <output name="orf_seqs_prob" ftype="tsv"> <assert_contents> <has_text text="ENST00000616016.5_ORF_1"/> <has_n_lines n="6237"/> </assert_contents> </output> <output name="orf_seqs_prob_best" ftype="tsv"> <assert_contents> <has_text text="ENST00000683977.1"/> <has_n_lines n="1301" delta="5"/> </assert_contents> </output> <output name="no_orf_seqs" file="test01_no_orgs.txt" ftype="txt"/> </test> <test expect_num_outputs="4"> <param name="gene" value="sequences.fasta.gz"/> <conditional name="ref_source"> <param name="source" value="history" /> <param name="ref_fasta" value="sequences.fasta.gz" /> </conditional> <param name="c" value="sequences.fasta.gz"/> <param name="n" value="sequences.fasta.gz"/> <param name="start" value="ATG"/> <param name="stop" value="TAG,TAA,TGA"/> <param name="min_orf_model" value="15"/> <param name="min_orf_cpat" value="60"/> <param name="antisense" value="true"/> <param name="top_orf" value="10"/> <param name="best_orf" value="p"/> <output name="orf_seqs" ftype="fasta"> <assert_contents> <has_text text=">ENST00000616016.5_ORF_1"/> <has_n_lines n="57357"/> </assert_contents> </output> <output name="orf_seqs_prob" ftype="tsv"> <assert_contents> <has_text text="ENST00000616016.5_ORF_1"/> <has_n_lines n="11667"/> </assert_contents> </output> <output name="orf_seqs_prob_best" ftype="tsv"> <assert_contents> <has_text text="ENST00000683977.1"/> <has_n_lines n="1301"/> </assert_contents> </output> <output name="no_orf_seqs" file="test02_no_orgs.txt" ftype="txt"/> </test> <test expect_num_outputs="4"> <param name="gene" value="sequences.fasta.gz"/> <conditional name="ref_source"> <param name="source" value="builtin"/> <param name="ref_fasta_builtin" value="test1" /> </conditional> <param name="c" value="sequences.fasta.gz"/> <param name="n" value="sequences.fasta.gz"/> <param name="start" value="ATG"/> <param name="stop" value="TAG,TAA,TGA"/> <param name="min_orf_model" value="15"/> <param name="min_orf_cpat" value="60"/> <param name="antisense" value="true"/> <param name="top_orf" value="10"/> <param name="best_orf" value="p"/> <output name="orf_seqs" ftype="fasta"> <assert_contents> <has_text text=">ENST00000616016.5_ORF_1"/> <has_n_lines n="57357"/> </assert_contents> </output> <output name="orf_seqs_prob" ftype="tsv"> <assert_contents> <has_text text="ENST00000616016.5_ORF_1"/> <has_n_lines n="11667"/> </assert_contents> </output> <output name="orf_seqs_prob_best" ftype="tsv"> <assert_contents> <has_text text="ENST00000683977.1"/> <has_n_lines n="1301"/> </assert_contents> </output> <output name="no_orf_seqs" file="test03_no_orgs.txt" ftype="txt"/> </test> </tests> <help><![CDATA[ .. class:: infomark **Purpose** CPAT is a bioinformatics tool to predict RNAs coding probability based on the RNA sequence characteristics. To achieve this goal, CPAT calculates scores of these 4 linguistic features from a set of known protein-coding genes and another set of non-coding genes. - ORF size - ORF coverage - Fickett TESTCODE - Hexamer usage bias CPAT will then builds a logistic regression model using these 4 features as predictor variables and the “protein-coding status” as the response variable. After evaluating the performance and determining the probability cutoff, the model can be used to predict new RNA sequences. ]]></help> <citations> <citation type="doi">10.1093/nar/gkt006</citation> </citations> </tool>