Galaxy |

Changeset 0:8e9e228c54c4 (2023-02-01)

Next changeset 1:1ac12c0cc7a0 (2024-01-29)

Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/cpat commit 1903b0d45ff57d20d6e3a9b95fa55ad6d4a0f345

added:
cpat.xml
test-data/sequences.fasta.gz
test-data/test01_no_orgs.txt
test-data/test02_no_orgs.txt

diff -r 000000000000 -r 8e9e228c54c4 cpat.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpat.xml Wed Feb 01 19:22:06 2023 +0000

[

b'@@ -0,0 +1,169 @@\n+<tool id="cpat" name="CPAT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">\n+ <description>coding potential assessment</description>\n+ <macros>\n+ <token name="@TOOL_VERSION@">3.0.4</token>\n+ <token name="@VERSION_SUFFIX@">0</token>\n+ </macros>\n+ <requirements>\n+ <requirement type="package" version="@TOOL_VERSION@">cpat</requirement>\n+ </requirements>\n+ <version_command>cpat --version</version_command>\n+ <command detect_errors="exit_code"><![CDATA[\n+ #set $gen_ext = $gene.ext\n+ ln -s \'${gene}\' \'./gene_sequences.${gen_ext}\' &&\n+ #set $cod_ext = $c.ext\n+ ln -s \'${c}\' \'./conding_sequences.${cod_ext}\' &&\n+ #set $ncod_ext = $n.ext\n+ ln -s \'${n}\' \'./nonconding_sequences.${ncod_ext}\' &&\n+ #set $ref_ext = $r.ext\n+ ln -s \'${r}\' \'./referece.${ref_ext}\' &&\n+ make_hexamer_tab.py \n+ -c \'./conding_sequences.${cod_ext}\'\n+ -n \'./nonconding_sequences.${ncod_ext}\' > \'./hexamer.tsv\' &&\n+ make_logitModel.py \n+ -x \'./hexamer.tsv\' \n+ -c \'./conding_sequences.${cod_ext}\'\n+ -n \'./nonconding_sequences.${ncod_ext}\'\n+ -r \'./referece.${ref_ext}\'\n+ --start=\'${start}\'\n+ --stop=\'${stop}\'\n+ --min-orf=$min_orf_model\n+ -o \'./logit_model\' &&\n+ cpat.py --verbose\n+ -x \'./hexamer.tsv\'\n+ -d \'./logit_model.logit.RData\'\n+ -g \'./gene_sequences.${gen_ext}\'\n+ --top-orf=$top_orf\n+ --antisense\n+ --start=\'${start}\'\n+ --stop=\'${stop}\'\n+ --min-orf=$min_orf_cpat\n+ $antisense\n+ --top-orf=$top_orf\n+ --best-orf=$best_orf\n+ -o \'output\' \n+ ]]>\n+ </command>\n+ <inputs>\n+ <param argument="--gene" type="data" format="fasta,fasta.gz" label="Query nucletide sequences" help="It is recommended to use short and unique sequence identifiers"/>\n+ <param argument="-r" type="data" format="fasta,fasta.gz" label="Reference genome" help="Reference genome sequences in FASTA format" />\n+ <param argument="-c" type="data" format="fasta,fasta.gz" label="Coding sequences file" help="Coding sequence (must be CDS without UTR, i.e. from start coden to stop coden) in FASTA format" />\n+ <param argument="-n" type="data" format="fasta,fasta.gz" label="Non coding sequeces file" help="Noncoding sequences in FASTA format" />\n+ <param argument="--start" type="text" value="ATG" label="Start codon">\n+ <sanitizer invalid_char="">\n+ <valid initial="string.letters"/>\n+ </sanitizer>\n+ <validator type="regex">[a-zA-Z]+</validator>\n+ </param>\n+ <param argument="--stop" type="text" value="TAG,TAA,TGA" label="Stop codons">\n+ <sanitizer invalid_char="">\n+ <valid initial="string.letters">\n+ <add value="," />\n+ </valid>\n+ </sanitizer>\n+ <validator type="regex">[a-zA-Z,]+</validator>\n+ </param>\n+ <param argument="--min-orf" name="min_orf_model" type="integer" min="0" value="30" label="Minimum ORF length" help="Minimum ORF length in nucleotides" />\n+ <param argument="--min-orf" name="min_orf_cpat" type="integer" min="0" value="75" label="Minimum ORF length" help="Minimum ORF length in nucleotides" />\n+ <param argument="--antisense" type="boolean" truevalue="--antisense" falsevalue="" checked="false" label="Search for ORFs from the anti-sense strand"/>\n+ <param argument="--top-orf" type="integer" min="0" value="5" label="Number of ORF candidates reported" help="RNAs may have dozens of putative ORFs, in most cases, the real ORF \n+ is ranked (by size) in the top several" />\n+ <param argument="--best-orf" type="select" label="Criteria to select the best ORF">\n+ <option value="l">ORF length (l)</option>\n+ '..b' <data name="no_orf_seqs" format="txt" from_work_dir="output.no_ORF.txt" label="${tool.name} on ${on_string}: no ORFs (TXT)"/>\n+ </outputs>\n+ <tests>\n+ <test expect_num_outputs="4">\n+ <param name="gene" value="sequences.fasta.gz"/>\n+ <param name="r" value="sequences.fasta.gz"/>\n+ <param name="c" value="sequences.fasta.gz"/>\n+ <param name="n" value="sequences.fasta.gz"/>\n+ <param name="start" value="ATG"/>\n+ <param name="stop" value="TAG,TAA,TGA"/>\n+ <param name="min_orf_model" value="30"/>\n+ <param name="min_orf_cpat" value="75"/>\n+ <param name="antisense" value="false"/>\n+ <param name="top_orf" value="5"/>\n+ <param name="best_orf" value="l"/>\n+ <output name="orf_seqs" ftype="fasta">\n+ <assert_contents>\n+ <has_text text=">ENST00000616016.5_ORF_1"/>\n+ <has_n_lines n="41009"/>\n+ </assert_contents>\n+ </output>\n+ <output name="orf_seqs_prob" ftype="tsv">\n+ <assert_contents>\n+ <has_text text="ENST00000616016.5_ORF_1"/>\n+ <has_n_lines n="6237"/>\n+ </assert_contents>\n+ </output>\n+ <output name="orf_seqs_prob_best" ftype="tsv">\n+ <assert_contents>\n+ <has_text text="ENST00000683977.1"/>\n+ <has_n_lines n="1301" delta="5"/>\n+ </assert_contents>\n+ </output>\n+ <output name="no_orf_seqs" file="test01_no_orgs.txt" ftype="txt"/>\n+ </test>\n+ <test expect_num_outputs="4">\n+ <param name="gene" value="sequences.fasta.gz"/>\n+ <param name="r" value="sequences.fasta.gz"/>\n+ <param name="c" value="sequences.fasta.gz"/>\n+ <param name="n" value="sequences.fasta.gz"/>\n+ <param name="start" value="ATG"/>\n+ <param name="stop" value="TAG,TAA,TGA"/>\n+ <param name="min_orf_model" value="15"/>\n+ <param name="min_orf_cpat" value="60"/>\n+ <param name="antisense" value="true"/>\n+ <param name="top_orf" value="10"/>\n+ <param name="best_orf" value="p"/>\n+ <output name="orf_seqs" ftype="fasta">\n+ <assert_contents>\n+ <has_text text=">ENST00000616016.5_ORF_1"/>\n+ <has_n_lines n="57357"/>\n+ </assert_contents>\n+ </output> \n+ <output name="orf_seqs_prob" ftype="tsv">\n+ <assert_contents>\n+ <has_text text="ENST00000616016.5_ORF_1"/>\n+ <has_n_lines n="11667"/>\n+ </assert_contents>\n+ </output>\n+ <output name="orf_seqs_prob_best" ftype="tsv">\n+ <assert_contents>\n+ <has_text text="ENST00000683977.1"/>\n+ <has_n_lines n="1301"/>\n+ </assert_contents>\n+ </output>\n+ <output name="no_orf_seqs" file="test02_no_orgs.txt" ftype="txt"/>\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n+\n+.. class:: infomark\n+\n+**Purpose**\n+\n+CPAT is a bioinformatics tool to predict RNAs coding probability based on the RNA sequence characteristics. To achieve this goal, CPAT calculates scores of these 4 linguistic features \n+from a set of known protein-coding genes and another set of non-coding genes.\n+\n+- ORF size\n+- ORF coverage\n+- Fickett TESTCODE\n+- Hexamer usage bias\n+\n+CPAT will then builds a logistic regression model using these 4 features as predictor variables and the \xe2\x80\x9cprotein-coding status\xe2\x80\x9d as the response variable. After evaluating the performance \n+and determining the probability cutoff, the model can be used to predict new RNA sequences.\n+\n+]]></help>\n+ <citations>\n+ <citation type="doi">10.1093/nar/gkt006</citation>\n+ </citations>\n+</tool>\n'

diff -r 000000000000 -r 8e9e228c54c4 test-data/sequences.fasta.gz

Binary file test-data/sequences.fasta.gz has changed

diff -r 000000000000 -r 8e9e228c54c4 test-data/test01_no_orgs.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test01_no_orgs.txt Wed Feb 01 19:22:06 2023 +0000

@@ -0,0 +1,3 @@
+ENST00000637839.1
+ENST00000636635.1
+ENST00000502273.8

diff -r 000000000000 -r 8e9e228c54c4 test-data/test02_no_orgs.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test02_no_orgs.txt Wed Feb 01 19:22:06 2023 +0000

@@ -0,0 +1,1 @@
+ENST00000637839.1