changeset 1:f415e44e71de draft

planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/genenotebook commit a04f273adbc0ebb95bf42bf5bad8b41ba7aba91d
author gga
date Mon, 03 Apr 2023 15:01:52 +0000
parents 22f22c3e81bf
children 5a6050937cb9
files genenotebook_build.xml launch_gnb.sh macros.xml test-data/blast.xml test-data/exp.tsv test-data/output/genome.tar.bz2 test-data/output/genome_annot.tar.bz2 test-data/output/genome_annot_ips.tar.bz2 test-data/output/genome_annot_ips_en.tar.bz2 test-data/output/genome_annot_ips_en_bl_exp.tar.bz2
diffstat 10 files changed, 219 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/genenotebook_build.xml	Wed Jan 11 11:49:13 2023 +0000
+++ b/genenotebook_build.xml	Mon Apr 03 15:01:52 2023 +0000
@@ -9,12 +9,25 @@
 @START_GNB@
 
 #for genome in $genomes:
-    genenotebook add genome @CONNECT_INFO@ --name '${genome.name}' '${genome.genome}';
+    genoboo add genome @CONNECT_INFO@ --name '${genome.name}' ${genome.public} '${genome.genome}';
 
     #for annot in $genome.annots:
-        genenotebook add annotation @CONNECT_INFO@ --genome-name '${genome.name}' '${annot.annotation}';
+        genoboo add annotation @CONNECT_INFO@ --name '${genome.name}'
+        #if $annot.prot_naming.method == 'regex'
+            #if $annot.prot_naming.re_protein:
+            --re_protein '$annot.prot_naming.re_protein'
+            #end if
+            #if $annot.prot_naming.re_protein_capture:
+            --re_protein_capture '$annot.prot_naming.re_protein_capture'
+            #end if
+        #elif $annot.prot_naming.method == "attr"
+            #if $annot.prot_naming.protein_id_attr:
+            --attr_protein '$annot.prot_naming.attr_protein'
+            #end if
+        #end if
+        '${annot.annotation}';
         #if $annot.interproscan:
-            genenotebook add interproscan @CONNECT_INFO@ --format
+            genoboo add interproscan @CONNECT_INFO@ --format
             #if $annot.interproscan.is_of_type('gff3'):
                 gff3
             #else:
@@ -24,8 +37,25 @@
         #end if
 
         #if $annot.eggnog:
-            genenotebook add eggnog @CONNECT_INFO@ '${annot.eggnog}';
+            genoboo add eggnog @CONNECT_INFO@ '${annot.eggnog}';
         #end if
+
+        #if $annot.blast_cond.blast_choice == "yes":
+            genoboo add blast @CONNECT_INFO@
+                --format 'xml'
+                --algorithm '${annot.blast_cond.algorithm}'
+                --matrix '${annot.blast_cond.matrix}'
+                --database '${annot.blast_cond.database}'
+                '${annot.blast_cond.blast}';
+        #end if
+
+        #for exp in $annot.expression:
+            genoboo add transcriptome @CONNECT_INFO@
+                --sample-name '${exp.sample_name}'
+                --replica-group '${exp.replica_group}'
+                --sample-description '${exp.sample_description}'
+                '${exp.counts}';
+        #end for
     #end for
 #end for
 
@@ -36,10 +66,51 @@
         <repeat name="genomes" title="Genomes">
             <param argument="--name" label="Name" type="text" help="Reference genome name" />
             <param name="genome" label="Genome sequence" type="data" format="fasta" />
+            <param name="public" label="Public access" type="boolean" truevalue="--public" falsevalue="" value="false" />
             <repeat name="annots" title="Annotations">
                 <param name="annotation" label="Annotation" type="data" format="gff3" />
+                <conditional name="prot_naming">
+                    <param name="method" type="select" label="Protein naming method">
+                        <option value="none">No specific names for proteins</option>
+                        <option value="regex">Based on mRNA name with regular expression</option>
+                        <option value="attr">From GFF attribute</option>
+                    </param>
+                    <when value="none" />
+                    <when value="regex">
+                        <param argument="--re_protein_capture" label="Regex protein capture" type="text" help="Regular expression to capture groups in mRNA name to use in 'Regex protein'" value="^(.*?)-R([A-Z]+)$">
+                            <expand macro="sanitized"/>
+                        </param>
+                        <param argument="--re_protein" label="Regex protein" type="text" help="Replacement string for the protein name using capturing groups defined in 'Regex protein capture'" value="$1-P$2">
+                            <expand macro="sanitized"/>
+                        </param>
+                    </when>
+                    <when value="attr">
+                        <param argument="--attr_protein" label="Protein id attribute" type="text" help="Attribute containing the protein uniquename. It is searched at the mRNA level, and if not found at CDS level." value="protein_id"/>
+                    </when>
+                </conditional>
                 <param name="interproscan" label="InterProScan results" optional="true" type="data" format="tsv,gff3" />
                 <param name="eggnog" label="EggNOG-Mapper results" optional="true" type="data" format="tsv" />
+
+                <conditional name="blast_cond">
+                    <param name="blast_choice" type="select" label="Import Blast or Diamond results">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no" />
+                    <when value="yes">
+                        <param name="blast" label="Blast or Diamond results" optional="false" type="data" format="xml" />
+                        <param argument="--algorithm" label="Algorithm" optional="false" type="text" help="e.g. blastx, blastp" />
+                        <param argument="--matrix" label="Matrix" optional="false" type="text" help="e.g. BLOSUM90, BLOSUM80, PAM100" />
+                        <param argument="--database" label="database" optional="false" type="text" help="e.g. Non-reundant protein sequences (nr)" />
+                    </when>
+                </conditional>
+
+                <repeat name="expression" title="Expression data">
+                    <param name="counts" label="Expression data" optional="true" type="data" format="tsv" />
+                    <param argument="--sample-name" name="sample_name" label="Unique sample name" type="text" />
+                    <param argument="--replica-group" name="replica_group" label="Identifier to group samples that belong to the same experiment" type="text" />
+                    <param argument="--sample-description" name="sample_description" label="Description of the experiment" type="text" />
+                </repeat>
             </repeat>
         </repeat>
     </inputs>
@@ -59,6 +130,17 @@
         </test>
         <test>
             <repeat name="genomes">
+                <param name="name" value="Test org" />
+                <param name="public" value="true" />
+                <param name="genome" value="genome.fa" />
+            </repeat>
+            <output name="gnb_db" file="output/genome.tar.bz2" compare="sim_size" />
+            <assert_stdout>
+                <has_text text="addGenome succesfully inserted 1 elements" />
+            </assert_stdout>
+        </test>
+        <test>
+            <repeat name="genomes">
                 <param name="name" value="Test org 2" />
                 <param name="genome" value="genome.fa" />
                 <repeat name="annots">
@@ -68,7 +150,7 @@
             <output name="gnb_db" file="output/genome_annot.tar.bz2" compare="sim_size" />
             <assert_stdout>
                 <has_text text="addGenome succesfully inserted 1 elements" />
-                <has_text text="addAnnotationTrack succesfully inserted 5 elements" />
+                <has_text text="addAnnotation succesfully inserted 5 elements" />
             </assert_stdout>
         </test>
         <test>
@@ -83,7 +165,7 @@
             <output name="gnb_db" file="output/genome_annot_ips.tar.bz2" compare="sim_size" />
             <assert_stdout>
                 <has_text text="addGenome succesfully inserted 1 elements" />
-                <has_text text="addAnnotationTrack succesfully inserted 5 elements" />
+                <has_text text="addAnnotation succesfully inserted 5 elements" />
                 <has_text text="addInterproscan succesfully inserted 5 elements" />
             </assert_stdout>
         </test>
@@ -111,9 +193,46 @@
             <output name="gnb_db" file="output/genome_annot_ips_en.tar.bz2" compare="sim_size" delta="15000" />
             <assert_stdout>
                 <has_text text="addGenome succesfully inserted 1 elements" />
-                <has_text text="addAnnotationTrack succesfully inserted 5 elements" />
+                <has_text text="addAnnotation succesfully inserted 5 elements" />
                 <has_text text="addInterproscan succesfully inserted 5 elements" />
-                <has_text text="addEggnog succesfully inserted undefined elements" />
+                <has_text text="addEggnog succesfully inserted 4 elements" />
+            </assert_stdout>
+        </test>
+        <test>
+            <repeat name="genomes">
+                <param name="name" value="Test org 2" />
+                <param name="genome" value="genome.fa" />
+                <repeat name="annots">
+                    <param name="annotation" value="annot.gff" />
+
+                    <conditional name="prot_naming">
+                        <param name="method" value="regex"/>
+                        <param name="re_protein_capture" value="^(.*?)-T([0-9]+)$" />
+                        <param name="re_protein" value="$1-P$2" />
+                    </conditional>
+
+                    <conditional name="blast_cond">
+                        <param name="blast_choice" value="yes"/>
+                        <param name="blast" value="blast.xml" ftype="xml" />
+                        <param name="algorithm" value="blastx" />
+                        <param name="matrix" value="BLOSUM80" />
+                        <param name="database" value="Non-reundant garbage (nrg)" />
+                    </conditional>
+
+                    <repeat name="expression">
+                        <param name="counts" value="exp.tsv" ftype="tsv" />
+                        <param name="sample_name" value="Sample1_rep1" />
+                        <param name="replica_group" value="Sample1" />
+                        <param name="sample_description" value="Pointless experiment" />
+                    </repeat>
+                </repeat>
+            </repeat>
+            <output name="gnb_db" file="output/genome_annot_ips_en_bl_exp.tar.bz2" compare="sim_size" delta="15000" />
+            <assert_stdout>
+                <has_text text="addGenome succesfully inserted 1 elements" />
+                <has_text text="addAnnotation succesfully inserted 5 elements" />
+                <has_text text="addSimilarSequence succesfully inserted 1 elements" />
+                <has_text text="addTranscriptome succesfully inserted 1 elements" />
             </assert_stdout>
         </test>
     </tests>
@@ -121,6 +240,8 @@
 Build a GeneNoteBook by loading data into a MongoDB database. The resulting dataset can then be displayed with the "View a GeneNoteBook" interactive tool (to be written).
 
 The resulting GeneNoteBook will contain the default user accounts created on the first launch. You are responsible to change them if you ever want to put your result online.
+
+The current version is based on a forked version of GeneNoteBook including several various improvements (https://github.com/gogepp/genoboo/).
     ]]></help>
     <expand macro="citation" />
 </tool>
--- a/launch_gnb.sh	Wed Jan 11 11:49:13 2023 +0000
+++ b/launch_gnb.sh	Mon Apr 03 15:01:52 2023 +0000
@@ -4,10 +4,8 @@
 
 mongod --dbpath ./mongo_db/ --unixSocketPrefix `pwd` --bind_ip fake_socket --logpath ./mongod.log --pidfilepath ./mongo.pid &
 
-sleep 5
+sleep 8
 
-# "waiting for connections on port" is for mongodb 4x
-#if ! grep -q "waiting for connections on port" ./mongod.log; then
 # "Listening on" is for mongodb 5x
 if ! grep -q "Listening on" ./mongod.log; then
   echo "Failed to launch MongoDB:" 1>&2;
@@ -19,7 +17,7 @@
 TMP_STORAGE=$(pwd)/tmp_storage
 mkdir "$TMP_STORAGE"
 
-genenotebook run --storage-path "$TMP_STORAGE" --port ${GNB_PORT} --mongo-url mongodb://$MONGO_URI%2Fmongodb-27017.sock/genenotebook > ./gnb.log 2>&1 &
+genoboo run --storage-path "$TMP_STORAGE" --port ${GNB_PORT} --mongo-url mongodb://$MONGO_URI%2Fmongodb-27017.sock/genenotebook > ./gnb.log 2>&1 &
 
 export GNB_PID=$!
 
--- a/macros.xml	Wed Jan 11 11:49:13 2023 +0000
+++ b/macros.xml	Mon Apr 03 15:01:52 2023 +0000
@@ -2,12 +2,12 @@
 <macros>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="@TOOL_VERSION@">genenotebook</requirement>
+            <requirement type="package" version="@TOOL_VERSION@">genoboo</requirement>
             <yield/>
         </requirements>
     </xml>
 
-    <token name="@TOOL_VERSION@">0.3.2</token>
+    <token name="@TOOL_VERSION@">0.4.1</token>
     <token name="@WRAPPER_VERSION@">@TOOL_VERSION@+galaxy0</token>
 
     <xml name="citation">
@@ -16,6 +16,19 @@
         </citations>
     </xml>
 
+    <xml name="sanitized">
+        <sanitizer>
+            <valid initial="string.printable">
+                <remove value="&apos;"/>
+            </valid>
+            <mapping initial="none">
+                <add source="&apos;" target="&apos;&quot;&apos;&quot;&apos;"/>
+                <add source="(" target="\("/>
+                <add source=")" target="\)"/>
+            </mapping>
+        </sanitizer>
+    </xml>
+
     <token name="@CONNECT_INFO@">-u admin -p admin --port \$GNB_PORT</token>
 
     <!--
@@ -24,8 +37,8 @@
     For some unknwon reason, unixSocketPrefix needs an absolute path
     -->
     <token name="@START_GNB@"><![CDATA[
-        export GNB_PORT=\$(bash '$__tool_directory__/find_free_port.sh');
-        export MONGO_URI=\$(pwd | sed 's|/|%2F|g');
+        export GNB_PORT="\$(bash '$__tool_directory__/find_free_port.sh')";
+        export MONGO_URI="\$(pwd | sed 's|/|%2F|g')";
         #if $existing
             tar -xf '${existing}' mongo_db;
         #else
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blast.xml	Mon Apr 03 15:01:52 2023 +0000
@@ -0,0 +1,69 @@
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
+<BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>diamond 2.0.13</BlastOutput_version>
+  <BlastOutput_reference>Benjamin Buchfink, Xie Chao, and Daniel Huson (2015), &quot;Fast and sensitive protein alignment using DIAMOND&quot;, Nature Methods 12:59-60.</BlastOutput_reference>
+  <BlastOutput_db>/db/nr/NR_2021-6-16/diamond/nr.dmnd</BlastOutput_db>
+  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
+  <BlastOutput_query-def>MMUCEDO_000001-P1</BlastOutput_query-def>
+  <BlastOutput_query-len>420</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>blosum62</Parameters_matrix>
+      <Parameters_expect>1e-08</Parameters_expect>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>F</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+<BlastOutput_iterations>
+<Iteration>
+  <Iteration_iter-num>1</Iteration_iter-num>
+  <Iteration_query-ID>Query_1</Iteration_query-ID>
+  <Iteration_query-def>MMUCEDO_000001-P1</Iteration_query-def>
+  <Iteration_query-len>226</Iteration_query-len>
+<Iteration_hits>
+<Hit>
+  <Hit_num>1</Hit_num>
+  <Hit_id>KAG2309741.1</Hit_id>
+  <Hit_def>hypothetical protein Bca52824_029489 [Brassica carinata]</Hit_def>
+  <Hit_accession>KAG2309741</Hit_accession>
+  <Hit_len>226</Hit_len>
+  <Hit_hsps>
+    <Hsp>
+      <Hsp_num>1</Hsp_num>
+      <Hsp_bit-score>803</Hsp_bit-score>
+      <Hsp_score>2075</Hsp_score>
+      <Hsp_evalue>7.84e-293</Hsp_evalue>
+      <Hsp_query-from>1</Hsp_query-from>
+      <Hsp_query-to>420</Hsp_query-to>
+      <Hsp_hit-from>1</Hsp_hit-from>
+      <Hsp_hit-to>420</Hsp_hit-to>
+      <Hsp_query-frame>0</Hsp_query-frame>
+      <Hsp_hit-frame>0</Hsp_hit-frame>
+      <Hsp_identity>417</Hsp_identity>
+      <Hsp_positive>418</Hsp_positive>
+      <Hsp_gaps>0</Hsp_gaps>
+      <Hsp_align-len>226</Hsp_align-len>
+         <Hsp_qseq>MASELTYRRHEMEQAEGRALYRKPMKPIRYMLREQRLVFVLVGIAIATFAFTLFSPSSTTQPIPISYYSDPEMRSYMSGGIGSVGGKIPLGLKRKGLRVVVTXXXXXXXXXLVDRLMARGDKVIVVDNFFTGSKENVMHHFGNPNFELIRHDVVEPILLEVDHIYHLACPASPVHYKFNPVKTIKTNVVGTLNMLGLAKRVGARFLLTSTSEVYGDPLQHPQLET</Hsp_qseq>
+         <Hsp_hseq>MASELTYRRHEMEQAEGRALYRKPMKPIRYMLREQRLVFVLVGIAIATLAFTLFSPSSTTQPIPISYYSDPEMRSYMSGGMGSVGGKIPLGLKRKGLRVVVTXXXXXXXXXLVDRLMARGDKVIVVDNFFTGSKENVMHHFGNPNFELIRHDVVEPILLEVDHIYHLACPASPVHYKFNPVKTIKTNVVGTLNMLGLAKRVGARFLLTSTSEVYGDPLQHPQLET</Hsp_hseq>
+      <Hsp_midline>MASELTYRRHEMEQAEGRALYRKPMKPIRYMLREQRLVFVLVGIAIAT AFTLFSPSSTTQPIPISYYSDPEMRSYMSGG+GSVGGKIPLGLKRKGLRVVVTXXXXXXXXXLVDRLMARGDKVIVVDNFFTGSKENVMHHFGNPNFELIRHDVVEPILLEVDHIYHLACPASPVHYKFNPVKTIKTNVVGTLNMLGLAKRVGARFLLTSTSEVYGDPLQHPQLET</Hsp_midline>
+    </Hsp>
+  </Hit_hsps>
+</Hit>
+</Iteration_hits>
+  <Iteration_stat>
+    <Statistics>
+      <Statistics_db-num>405733372</Statistics_db-num>
+      <Statistics_db-len>147630876712</Statistics_db-len>
+      <Statistics_hsp-len>0</Statistics_hsp-len>
+      <Statistics_eff-space>0</Statistics_eff-space>
+      <Statistics_kappa>0.041000</Statistics_kappa>
+      <Statistics_lambda>0.267000</Statistics_lambda>
+      <Statistics_entropy>0</Statistics_entropy>
+    </Statistics>
+  </Iteration_stat>
+</Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/exp.tsv	Mon Apr 03 15:01:52 2023 +0000
@@ -0,0 +1,2 @@
+target_id	length	eff_length	est_counts	tpm
+MMUCEDO_000001-T1	1221	1021.99	21	1.80368
Binary file test-data/output/genome.tar.bz2 has changed
Binary file test-data/output/genome_annot.tar.bz2 has changed
Binary file test-data/output/genome_annot_ips.tar.bz2 has changed
Binary file test-data/output/genome_annot_ips_en.tar.bz2 has changed
Binary file test-data/output/genome_annot_ips_en_bl_exp.tar.bz2 has changed