Repository 'genenotebook_genenotebook_build'
hg clone https://toolshed.g2.bx.psu.edu/repos/gga/genenotebook_genenotebook_build

Changeset 1:f415e44e71de (2023-04-03)
Previous changeset 0:22f22c3e81bf (2023-01-11) Next changeset 2:5a6050937cb9 (2023-04-14)
Commit message:
planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/genenotebook commit a04f273adbc0ebb95bf42bf5bad8b41ba7aba91d
modified:
genenotebook_build.xml
launch_gnb.sh
macros.xml
test-data/output/genome.tar.bz2
test-data/output/genome_annot.tar.bz2
test-data/output/genome_annot_ips.tar.bz2
test-data/output/genome_annot_ips_en.tar.bz2
added:
test-data/blast.xml
test-data/exp.tsv
test-data/output/genome_annot_ips_en_bl_exp.tar.bz2
b
diff -r 22f22c3e81bf -r f415e44e71de genenotebook_build.xml
--- a/genenotebook_build.xml Wed Jan 11 11:49:13 2023 +0000
+++ b/genenotebook_build.xml Mon Apr 03 15:01:52 2023 +0000
[
b'@@ -9,12 +9,25 @@\n @START_GNB@\n \n #for genome in $genomes:\n-    genenotebook add genome @CONNECT_INFO@ --name \'${genome.name}\' \'${genome.genome}\';\n+    genoboo add genome @CONNECT_INFO@ --name \'${genome.name}\' ${genome.public} \'${genome.genome}\';\n \n     #for annot in $genome.annots:\n-        genenotebook add annotation @CONNECT_INFO@ --genome-name \'${genome.name}\' \'${annot.annotation}\';\n+        genoboo add annotation @CONNECT_INFO@ --name \'${genome.name}\'\n+        #if $annot.prot_naming.method == \'regex\'\n+            #if $annot.prot_naming.re_protein:\n+            --re_protein \'$annot.prot_naming.re_protein\'\n+            #end if\n+            #if $annot.prot_naming.re_protein_capture:\n+            --re_protein_capture \'$annot.prot_naming.re_protein_capture\'\n+            #end if\n+        #elif $annot.prot_naming.method == "attr"\n+            #if $annot.prot_naming.protein_id_attr:\n+            --attr_protein \'$annot.prot_naming.attr_protein\'\n+            #end if\n+        #end if\n+        \'${annot.annotation}\';\n         #if $annot.interproscan:\n-            genenotebook add interproscan @CONNECT_INFO@ --format\n+            genoboo add interproscan @CONNECT_INFO@ --format\n             #if $annot.interproscan.is_of_type(\'gff3\'):\n                 gff3\n             #else:\n@@ -24,8 +37,25 @@\n         #end if\n \n         #if $annot.eggnog:\n-            genenotebook add eggnog @CONNECT_INFO@ \'${annot.eggnog}\';\n+            genoboo add eggnog @CONNECT_INFO@ \'${annot.eggnog}\';\n         #end if\n+\n+        #if $annot.blast_cond.blast_choice == "yes":\n+            genoboo add blast @CONNECT_INFO@\n+                --format \'xml\'\n+                --algorithm \'${annot.blast_cond.algorithm}\'\n+                --matrix \'${annot.blast_cond.matrix}\'\n+                --database \'${annot.blast_cond.database}\'\n+                \'${annot.blast_cond.blast}\';\n+        #end if\n+\n+        #for exp in $annot.expression:\n+            genoboo add transcriptome @CONNECT_INFO@\n+                --sample-name \'${exp.sample_name}\'\n+                --replica-group \'${exp.replica_group}\'\n+                --sample-description \'${exp.sample_description}\'\n+                \'${exp.counts}\';\n+        #end for\n     #end for\n #end for\n \n@@ -36,10 +66,51 @@\n         <repeat name="genomes" title="Genomes">\n             <param argument="--name" label="Name" type="text" help="Reference genome name" />\n             <param name="genome" label="Genome sequence" type="data" format="fasta" />\n+            <param name="public" label="Public access" type="boolean" truevalue="--public" falsevalue="" value="false" />\n             <repeat name="annots" title="Annotations">\n                 <param name="annotation" label="Annotation" type="data" format="gff3" />\n+                <conditional name="prot_naming">\n+                    <param name="method" type="select" label="Protein naming method">\n+                        <option value="none">No specific names for proteins</option>\n+                        <option value="regex">Based on mRNA name with regular expression</option>\n+                        <option value="attr">From GFF attribute</option>\n+                    </param>\n+                    <when value="none" />\n+                    <when value="regex">\n+                        <param argument="--re_protein_capture" label="Regex protein capture" type="text" help="Regular expression to capture groups in mRNA name to use in \'Regex protein\'" value="^(.*?)-R([A-Z]+)$">\n+                            <expand macro="sanitized"/>\n+                        </param>\n+                        <param argument="--re_protein" label="Regex protein" type="text" help="Replacement string for the protein name using capturing groups defined in \'Regex protein capture\'" value="$1-P$2">\n+                            <expand macro="sanitized"/>\n+                        </param>\n+                    </when>\n+                    <when value="attr">\n+                        <param argument="--attr_protein" lab'..b'elements" />\n+                <has_text text="addAnnotation succesfully inserted 5 elements" />\n             </assert_stdout>\n         </test>\n         <test>\n@@ -83,7 +165,7 @@\n             <output name="gnb_db" file="output/genome_annot_ips.tar.bz2" compare="sim_size" />\n             <assert_stdout>\n                 <has_text text="addGenome succesfully inserted 1 elements" />\n-                <has_text text="addAnnotationTrack succesfully inserted 5 elements" />\n+                <has_text text="addAnnotation succesfully inserted 5 elements" />\n                 <has_text text="addInterproscan succesfully inserted 5 elements" />\n             </assert_stdout>\n         </test>\n@@ -111,9 +193,46 @@\n             <output name="gnb_db" file="output/genome_annot_ips_en.tar.bz2" compare="sim_size" delta="15000" />\n             <assert_stdout>\n                 <has_text text="addGenome succesfully inserted 1 elements" />\n-                <has_text text="addAnnotationTrack succesfully inserted 5 elements" />\n+                <has_text text="addAnnotation succesfully inserted 5 elements" />\n                 <has_text text="addInterproscan succesfully inserted 5 elements" />\n-                <has_text text="addEggnog succesfully inserted undefined elements" />\n+                <has_text text="addEggnog succesfully inserted 4 elements" />\n+            </assert_stdout>\n+        </test>\n+        <test>\n+            <repeat name="genomes">\n+                <param name="name" value="Test org 2" />\n+                <param name="genome" value="genome.fa" />\n+                <repeat name="annots">\n+                    <param name="annotation" value="annot.gff" />\n+\n+                    <conditional name="prot_naming">\n+                        <param name="method" value="regex"/>\n+                        <param name="re_protein_capture" value="^(.*?)-T([0-9]+)$" />\n+                        <param name="re_protein" value="$1-P$2" />\n+                    </conditional>\n+\n+                    <conditional name="blast_cond">\n+                        <param name="blast_choice" value="yes"/>\n+                        <param name="blast" value="blast.xml" ftype="xml" />\n+                        <param name="algorithm" value="blastx" />\n+                        <param name="matrix" value="BLOSUM80" />\n+                        <param name="database" value="Non-reundant garbage (nrg)" />\n+                    </conditional>\n+\n+                    <repeat name="expression">\n+                        <param name="counts" value="exp.tsv" ftype="tsv" />\n+                        <param name="sample_name" value="Sample1_rep1" />\n+                        <param name="replica_group" value="Sample1" />\n+                        <param name="sample_description" value="Pointless experiment" />\n+                    </repeat>\n+                </repeat>\n+            </repeat>\n+            <output name="gnb_db" file="output/genome_annot_ips_en_bl_exp.tar.bz2" compare="sim_size" delta="15000" />\n+            <assert_stdout>\n+                <has_text text="addGenome succesfully inserted 1 elements" />\n+                <has_text text="addAnnotation succesfully inserted 5 elements" />\n+                <has_text text="addSimilarSequence succesfully inserted 1 elements" />\n+                <has_text text="addTranscriptome succesfully inserted 1 elements" />\n             </assert_stdout>\n         </test>\n     </tests>\n@@ -121,6 +240,8 @@\n Build a GeneNoteBook by loading data into a MongoDB database. The resulting dataset can then be displayed with the "View a GeneNoteBook" interactive tool (to be written).\n \n The resulting GeneNoteBook will contain the default user accounts created on the first launch. You are responsible to change them if you ever want to put your result online.\n+\n+The current version is based on a forked version of GeneNoteBook including several various improvements (https://github.com/gogepp/genoboo/).\n     ]]></help>\n     <expand macro="citation" />\n </tool>\n'
b
diff -r 22f22c3e81bf -r f415e44e71de launch_gnb.sh
--- a/launch_gnb.sh Wed Jan 11 11:49:13 2023 +0000
+++ b/launch_gnb.sh Mon Apr 03 15:01:52 2023 +0000
b
@@ -4,10 +4,8 @@
 
 mongod --dbpath ./mongo_db/ --unixSocketPrefix `pwd` --bind_ip fake_socket --logpath ./mongod.log --pidfilepath ./mongo.pid &
 
-sleep 5
+sleep 8
 
-# "waiting for connections on port" is for mongodb 4x
-#if ! grep -q "waiting for connections on port" ./mongod.log; then
 # "Listening on" is for mongodb 5x
 if ! grep -q "Listening on" ./mongod.log; then
   echo "Failed to launch MongoDB:" 1>&2;
@@ -19,7 +17,7 @@
 TMP_STORAGE=$(pwd)/tmp_storage
 mkdir "$TMP_STORAGE"
 
-genenotebook run --storage-path "$TMP_STORAGE" --port ${GNB_PORT} --mongo-url mongodb://$MONGO_URI%2Fmongodb-27017.sock/genenotebook > ./gnb.log 2>&1 &
+genoboo run --storage-path "$TMP_STORAGE" --port ${GNB_PORT} --mongo-url mongodb://$MONGO_URI%2Fmongodb-27017.sock/genenotebook > ./gnb.log 2>&1 &
 
 export GNB_PID=$!
 
b
diff -r 22f22c3e81bf -r f415e44e71de macros.xml
--- a/macros.xml Wed Jan 11 11:49:13 2023 +0000
+++ b/macros.xml Mon Apr 03 15:01:52 2023 +0000
[
@@ -2,12 +2,12 @@
 <macros>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="@TOOL_VERSION@">genenotebook</requirement>
+            <requirement type="package" version="@TOOL_VERSION@">genoboo</requirement>
             <yield/>
         </requirements>
     </xml>
 
-    <token name="@TOOL_VERSION@">0.3.2</token>
+    <token name="@TOOL_VERSION@">0.4.1</token>
     <token name="@WRAPPER_VERSION@">@TOOL_VERSION@+galaxy0</token>
 
     <xml name="citation">
@@ -16,6 +16,19 @@
         </citations>
     </xml>
 
+    <xml name="sanitized">
+        <sanitizer>
+            <valid initial="string.printable">
+                <remove value="&apos;"/>
+            </valid>
+            <mapping initial="none">
+                <add source="&apos;" target="&apos;&quot;&apos;&quot;&apos;"/>
+                <add source="(" target="\("/>
+                <add source=")" target="\)"/>
+            </mapping>
+        </sanitizer>
+    </xml>
+
     <token name="@CONNECT_INFO@">-u admin -p admin --port \$GNB_PORT</token>
 
     <!--
@@ -24,8 +37,8 @@
     For some unknwon reason, unixSocketPrefix needs an absolute path
     -->
     <token name="@START_GNB@"><![CDATA[
-        export GNB_PORT=\$(bash '$__tool_directory__/find_free_port.sh');
-        export MONGO_URI=\$(pwd | sed 's|/|%2F|g');
+        export GNB_PORT="\$(bash '$__tool_directory__/find_free_port.sh')";
+        export MONGO_URI="\$(pwd | sed 's|/|%2F|g')";
         #if $existing
             tar -xf '${existing}' mongo_db;
         #else
b
diff -r 22f22c3e81bf -r f415e44e71de test-data/blast.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blast.xml Mon Apr 03 15:01:52 2023 +0000
[
@@ -0,0 +1,69 @@
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
+<BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>diamond 2.0.13</BlastOutput_version>
+  <BlastOutput_reference>Benjamin Buchfink, Xie Chao, and Daniel Huson (2015), &quot;Fast and sensitive protein alignment using DIAMOND&quot;, Nature Methods 12:59-60.</BlastOutput_reference>
+  <BlastOutput_db>/db/nr/NR_2021-6-16/diamond/nr.dmnd</BlastOutput_db>
+  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
+  <BlastOutput_query-def>MMUCEDO_000001-P1</BlastOutput_query-def>
+  <BlastOutput_query-len>420</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>blosum62</Parameters_matrix>
+      <Parameters_expect>1e-08</Parameters_expect>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>F</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+<BlastOutput_iterations>
+<Iteration>
+  <Iteration_iter-num>1</Iteration_iter-num>
+  <Iteration_query-ID>Query_1</Iteration_query-ID>
+  <Iteration_query-def>MMUCEDO_000001-P1</Iteration_query-def>
+  <Iteration_query-len>226</Iteration_query-len>
+<Iteration_hits>
+<Hit>
+  <Hit_num>1</Hit_num>
+  <Hit_id>KAG2309741.1</Hit_id>
+  <Hit_def>hypothetical protein Bca52824_029489 [Brassica carinata]</Hit_def>
+  <Hit_accession>KAG2309741</Hit_accession>
+  <Hit_len>226</Hit_len>
+  <Hit_hsps>
+    <Hsp>
+      <Hsp_num>1</Hsp_num>
+      <Hsp_bit-score>803</Hsp_bit-score>
+      <Hsp_score>2075</Hsp_score>
+      <Hsp_evalue>7.84e-293</Hsp_evalue>
+      <Hsp_query-from>1</Hsp_query-from>
+      <Hsp_query-to>420</Hsp_query-to>
+      <Hsp_hit-from>1</Hsp_hit-from>
+      <Hsp_hit-to>420</Hsp_hit-to>
+      <Hsp_query-frame>0</Hsp_query-frame>
+      <Hsp_hit-frame>0</Hsp_hit-frame>
+      <Hsp_identity>417</Hsp_identity>
+      <Hsp_positive>418</Hsp_positive>
+      <Hsp_gaps>0</Hsp_gaps>
+      <Hsp_align-len>226</Hsp_align-len>
+         <Hsp_qseq>MASELTYRRHEMEQAEGRALYRKPMKPIRYMLREQRLVFVLVGIAIATFAFTLFSPSSTTQPIPISYYSDPEMRSYMSGGIGSVGGKIPLGLKRKGLRVVVTXXXXXXXXXLVDRLMARGDKVIVVDNFFTGSKENVMHHFGNPNFELIRHDVVEPILLEVDHIYHLACPASPVHYKFNPVKTIKTNVVGTLNMLGLAKRVGARFLLTSTSEVYGDPLQHPQLET</Hsp_qseq>
+         <Hsp_hseq>MASELTYRRHEMEQAEGRALYRKPMKPIRYMLREQRLVFVLVGIAIATLAFTLFSPSSTTQPIPISYYSDPEMRSYMSGGMGSVGGKIPLGLKRKGLRVVVTXXXXXXXXXLVDRLMARGDKVIVVDNFFTGSKENVMHHFGNPNFELIRHDVVEPILLEVDHIYHLACPASPVHYKFNPVKTIKTNVVGTLNMLGLAKRVGARFLLTSTSEVYGDPLQHPQLET</Hsp_hseq>
+      <Hsp_midline>MASELTYRRHEMEQAEGRALYRKPMKPIRYMLREQRLVFVLVGIAIAT AFTLFSPSSTTQPIPISYYSDPEMRSYMSGG+GSVGGKIPLGLKRKGLRVVVTXXXXXXXXXLVDRLMARGDKVIVVDNFFTGSKENVMHHFGNPNFELIRHDVVEPILLEVDHIYHLACPASPVHYKFNPVKTIKTNVVGTLNMLGLAKRVGARFLLTSTSEVYGDPLQHPQLET</Hsp_midline>
+    </Hsp>
+  </Hit_hsps>
+</Hit>
+</Iteration_hits>
+  <Iteration_stat>
+    <Statistics>
+      <Statistics_db-num>405733372</Statistics_db-num>
+      <Statistics_db-len>147630876712</Statistics_db-len>
+      <Statistics_hsp-len>0</Statistics_hsp-len>
+      <Statistics_eff-space>0</Statistics_eff-space>
+      <Statistics_kappa>0.041000</Statistics_kappa>
+      <Statistics_lambda>0.267000</Statistics_lambda>
+      <Statistics_entropy>0</Statistics_entropy>
+    </Statistics>
+  </Iteration_stat>
+</Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
b
diff -r 22f22c3e81bf -r f415e44e71de test-data/exp.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/exp.tsv Mon Apr 03 15:01:52 2023 +0000
b
@@ -0,0 +1,2 @@
+target_id length eff_length est_counts tpm
+MMUCEDO_000001-T1 1221 1021.99 21 1.80368
b
diff -r 22f22c3e81bf -r f415e44e71de test-data/output/genome.tar.bz2
b
Binary file test-data/output/genome.tar.bz2 has changed
b
diff -r 22f22c3e81bf -r f415e44e71de test-data/output/genome_annot.tar.bz2
b
Binary file test-data/output/genome_annot.tar.bz2 has changed
b
diff -r 22f22c3e81bf -r f415e44e71de test-data/output/genome_annot_ips.tar.bz2
b
Binary file test-data/output/genome_annot_ips.tar.bz2 has changed
b
diff -r 22f22c3e81bf -r f415e44e71de test-data/output/genome_annot_ips_en.tar.bz2
b
Binary file test-data/output/genome_annot_ips_en.tar.bz2 has changed
b
diff -r 22f22c3e81bf -r f415e44e71de test-data/output/genome_annot_ips_en_bl_exp.tar.bz2
b
Binary file test-data/output/genome_annot_ips_en_bl_exp.tar.bz2 has changed