Mercurial > repos > bgruening > cpat

--- a/cpat.xml	Mon Jan 29 10:44:49 2024 +0000
+++ b/cpat.xml	Tue Jul 23 15:09:07 2024 +0000
@@ -2,21 +2,24 @@
     <description>coding potential assessment</description>
     <macros>
         <token name="@TOOL_VERSION@">3.0.5</token>
-        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@VERSION_SUFFIX@">1</token>
     </macros>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">cpat</requirement>
     </requirements>
     <version_command>cpat --version</version_command>
     <command detect_errors="exit_code"><![CDATA[
+        #if $ref_source.source == "history"
+            ln -s '${ref_source.ref_fasta}' reference.fasta &&
+        #elif $ref_source.source == "builtin"
+            ln -s '${ref_source.ref_fasta_builtin.fields.path}' reference.fasta &&
+        #end if
         #set $gen_ext = $gene.ext
         ln -s '${gene}' './gene_sequences.${gen_ext}' &&
         #set $cod_ext = $c.ext
         ln -s '${c}' './conding_sequences.${cod_ext}' &&
         #set $ncod_ext = $n.ext
         ln -s '${n}' './nonconding_sequences.${ncod_ext}' &&
-        #set $ref_ext = $r.ext
-        ln -s '${r}' './referece.${ref_ext}' &&
         make_hexamer_tab.py
             -c './conding_sequences.${cod_ext}'
             -n './nonconding_sequences.${ncod_ext}' > './hexamer.tsv' &&
@@ -24,7 +27,7 @@
             -x './hexamer.tsv'
             -c './conding_sequences.${cod_ext}'
             -n './nonconding_sequences.${ncod_ext}'
-            -r './referece.${ref_ext}'
+            -r './referece.fasta'
             --start='${start}'
             --stop='${stop}'
             --min-orf=$min_orf_model
@@ -46,9 +49,22 @@
     </command>
     <inputs>
         <param argument="--gene" type="data" format="fasta,fasta.gz" label="Query nucletide sequences" help="It is recommended to use short and unique sequence identifiers"/>
-        <param argument="-r" type="data" format="fasta,fasta.gz" label="Reference genome" help="Reference genome sequences in FASTA format" />
-        <param argument="-c" type="data" format="fasta,fasta.gz" label="Coding sequences file" help="Coding sequence (must be CDS without UTR, i.e. from start coden to stop coden) in FASTA format" />
-        <param argument="-n" type="data" format="fasta,fasta.gz" label="Non coding sequeces file" help="Noncoding sequences in FASTA format" />
+        <conditional name="ref_source">
+            <param type="select" label="Reference genome source" name="source">
+                <option value="history" selected="true">Use from History</option>
+                <option value="builtin">Use Built-in</option>
+            </param>
+            <when value="history">
+                <param type="data" format="fasta,fastq.gz" name="ref_fasta" argument="-r" label="Reference genome from History" help="Reference genome sequences in FASTA format"/>
+            </when>
+            <when value="builtin">
+                <param type="select" name="ref_fasta_builtin" argument="-r" label="Reference genome from Built-in reference">
+                    <options from_data_table="all_fasta" />
+                </param>
+            </when>
+        </conditional>
+        <param argument="-c" type="data" format="fasta,fasta.gz" label="Coding sequences file" help="Coding sequence (must be CDS without UTR, i.e. from start coden to stop coden) in FASTA format"/>
+        <param argument="-n" type="data" format="fasta,fasta.gz" label="Non coding sequences file" help="Noncoding sequences in FASTA format"/>
         <param argument="--start" type="text" value="ATG" label="Start codon">
             <sanitizer invalid_char="">
                 <valid initial="string.letters"/>
@@ -82,7 +98,10 @@
     <tests>
         <test expect_num_outputs="4">
             <param name="gene" value="sequences.fasta.gz"/>
-            <param name="r" value="sequences.fasta.gz"/>
+            <conditional name="ref_source">
+                <param name="source" value="history" />
+                <param name="ref_fasta" value="sequences.fasta.gz" />
+            </conditional>
             <param name="c" value="sequences.fasta.gz"/>
             <param name="n" value="sequences.fasta.gz"/>
             <param name="start" value="ATG"/>
@@ -114,7 +133,10 @@
         </test>
         <test expect_num_outputs="4">
             <param name="gene" value="sequences.fasta.gz"/>
-            <param name="r" value="sequences.fasta.gz"/>
+            <conditional name="ref_source">
+                <param name="source" value="history" />
+                <param name="ref_fasta" value="sequences.fasta.gz" />
+            </conditional>
             <param name="c" value="sequences.fasta.gz"/>
             <param name="n" value="sequences.fasta.gz"/>
             <param name="start" value="ATG"/>
@@ -144,6 +166,41 @@
             </output>
             <output name="no_orf_seqs" file="test02_no_orgs.txt" ftype="txt"/>
         </test>
+        <test expect_num_outputs="4">
+            <param name="gene" value="sequences.fasta.gz"/>
+            <conditional name="ref_source">
+                <param name="source" value="builtin"/>
+                <param name="ref_fasta_builtin" value="test1" />
+            </conditional>
+            <param name="c" value="sequences.fasta.gz"/>
+            <param name="n" value="sequences.fasta.gz"/>
+            <param name="start" value="ATG"/>
+            <param name="stop" value="TAG,TAA,TGA"/>
+            <param name="min_orf_model" value="15"/>
+            <param name="min_orf_cpat" value="60"/>
+            <param name="antisense" value="true"/>
+            <param name="top_orf" value="10"/>
+            <param name="best_orf" value="p"/>
+            <output name="orf_seqs" ftype="fasta">
+                <assert_contents>
+                    <has_text text=">ENST00000616016.5_ORF_1"/>
+                    <has_n_lines n="57357"/>
+                </assert_contents>
+            </output>
+            <output name="orf_seqs_prob" ftype="tsv">
+                <assert_contents>
+                    <has_text text="ENST00000616016.5_ORF_1"/>
+                    <has_n_lines n="11667"/>
+                </assert_contents>
+            </output>
+            <output name="orf_seqs_prob_best" ftype="tsv">
+                <assert_contents>
+                    <has_text text="ENST00000683977.1"/>
+                    <has_n_lines n="1301"/>
+                </assert_contents>
+            </output>
+            <output name="no_orf_seqs" file="test03_no_orgs.txt" ftype="txt"/>
+        </test>
     </tests>
     <help><![CDATA[
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc.test	Tue Jul 23 15:09:07 2024 +0000
@@ -0,0 +1,10 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+test1	"Test Genome"	${__HERE__}/test1.fasta.gz
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test03_no_orgs.txt	Tue Jul 23 15:09:07 2024 +0000
@@ -0,0 +1,1 @@
+ENST00000637839.1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Tue Jul 23 15:09:07 2024 +0000
@@ -0,0 +1,10 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#test1	Test-Genome	./test-data/test1.fa.gz
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Jul 23 15:09:07 2024 +0000
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<tables>
+    <table name="all_fasta" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Tue Jul 23 15:09:07 2024 +0000
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of reference genome files in fasta format -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc.test" />
+    </table>
+</tables>
\ No newline at end of file