Repository 'blast2html'
hg clone https://toolshed.g2.bx.psu.edu/repos/jankanis/blast2html

Changeset 98:e780606b7c25 (2014-06-30)
Previous changeset 97:8b3c5d334a84 (2014-06-30) Next changeset 99:8f02008a5f20 (2014-07-01)
Commit message:
test new command line parameters, fix small bug
modified:
blast2html.py
test-data/blast xml example3.html
test-data/blast xml example4.html
test-data/refresh_test_results.sh
added:
test-data/blastdb.loc
b
diff -r 8b3c5d334a84 -r e780606b7c25 blast2html.py
--- a/blast2html.py Mon Jun 30 17:31:26 2014 +0200
+++ b/blast2html.py Mon Jun 30 17:51:56 2014 +0200
[
@@ -386,7 +386,7 @@
             for l in f.readlines():
                 if l.strip().startswith('#'):
                     continue
-                line = l.split('\t')
+                line = l.rstrip('\n').split('\t')
                 try:
                     links[line[2]] = line[3]
                 except IndexError:
b
diff -r 8b3c5d334a84 -r e780606b7c25 test-data/blast xml example3.html
--- a/test-data/blast xml example3.html Mon Jun 30 17:31:26 2014 +0200
+++ b/test-data/blast xml example3.html Mon Jun 30 17:51:56 2014 +0200
b
b'@@ -794,7 +794,7 @@\n                   <td>100%</td>\n                   <td>7.011e-08</td>\n                   <td>100%</td>\n-                  <td><a href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_3?report=genbank&amp;log$=nuclalign">Subject_3</a></td>\n+                  <td><a href="http://example.com/example-genebank?id=Subject_3">Subject_3</a></td>\n                 </tr>\n               </table>\n \n@@ -811,13 +811,13 @@\n \n                 <div class=linkheader>\n                   <div class=right><a href="#description3-1">Descriptions</a></div>\n-                  <a class="linkheader" href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_3?report=genbank&amp;log$=nuclalign">Gene Bank</a>\n+                  <a class="linkheader" href="http://example.com/example-genebank?id=Subject_3">Gene Bank</a>\n                 </div>\n \n                 <div class=title>\n                   <p class=hittitle>DJ437711|GenBank|insert_MIR604|Corn_Event_MIR604,_Left_Border_region|-5751164067366620000</p>\n                   <p class=titleinfo>\n-                    <span class=b>Sequence ID:</span> <a href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_3?report=genbank&amp;log$=nuclalign">Subject_3</a>\n+                    <span class=b>Sequence ID:</span> <a href="http://example.com/example-genebank?id=Subject_3">Subject_3</a>\n                     <span class=b>Length:</span> 323\n                     <span class=b>Number of Matches:</span> 1\n                   </p>\n@@ -1030,7 +1030,7 @@\n                   <td>100%</td>\n                   <td>7.011e-08</td>\n                   <td>100%</td>\n-                  <td><a href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_6?report=genbank&amp;log$=nuclalign">Subject_6</a></td>\n+                  <td><a href="http://example.com/example-genebank?id=Subject_6">Subject_6</a></td>\n                 </tr>\n               </table>\n \n@@ -1047,13 +1047,13 @@\n \n                 <div class=linkheader>\n                   <div class=right><a href="#description6-1">Descriptions</a></div>\n-                  <a class="linkheader" href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_6?report=genbank&amp;log$=nuclalign">Gene Bank</a>\n+                  <a class="linkheader" href="http://example.com/example-genebank?id=Subject_6">Gene Bank</a>\n                 </div>\n \n                 <div class=title>\n                   <p class=hittitle>AB209952.1|GenBank|insert_GTS-40-3-2|Glycine_max_transgenic_cp4epsps_gene_for_5-enol-pyruvylshikimate-3-phospate_synthase_class_2_precursor,_complete_cds|-9105899556052450000</p>\n                   <p class=titleinfo>\n-                    <span class=b>Sequence ID:</span> <a href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_6?report=genbank&amp;log$=nuclalign">Subject_6</a>\n+                    <span class=b>Sequence ID:</span> <a href="http://example.com/example-genebank?id=Subject_6">Subject_6</a>\n                     <span class=b>Length:</span> 2457\n                     <span class=b>Number of Matches:</span> 1\n                   </p>\n@@ -1502,7 +1502,7 @@\n                   <td>85%</td>\n                   <td>2.981e-06</td>\n                   <td>100%</td>\n-                  <td><a href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_2?report=genbank&amp;log$=nuclalign">Subject_2</a></td>\n+                  <td><a href="http://example.com/example-genebank?id=Subject_2">Subject_2</a></td>\n                 </tr>\n               </table>\n \n@@ -1519,13 +1519,13 @@\n \n                 <div class=linkheader>\n                   <div class=right><a href="#description18-1">Descriptions</a></div>\n-                  <a class="linkheader" href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_2?report=genbank&amp;log$=nuclalign">Gene Bank</a>\n+                  <a class="linkheader" href="http://example.com/example-genebank?id=Subject_2">Gene Bank</a>\n                 </div>\n \n                 <div class=title>\n                   <p class=hittitle>AJ308515.1|GenBank|insert_GTS-40-3-2|Synt'..b' @@\n \n                 <div class=linkheader>\n                   <div class=right><a href="#description48-1">Descriptions</a></div>\n-                  <a class="linkheader" href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_8?report=genbank&amp;log$=nuclalign">Gene Bank</a>\n+                  <a class="linkheader" href="http://example.com/example-genebank?id=Subject_8">Gene Bank</a>\n                 </div>\n \n                 <div class=title>\n                   <p class=hittitle>EUG|RIKILT|plasmid_pV-ZMBK07|plasmid_pV-ZMBK07|-2635190737607180000</p>\n                   <p class=titleinfo>\n-                    <span class=b>Sequence ID:</span> <a href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_8?report=genbank&amp;log$=nuclalign">Subject_8</a>\n+                    <span class=b>Sequence ID:</span> <a href="http://example.com/example-genebank?id=Subject_8">Subject_8</a>\n                     <span class=b>Length:</span> 4983\n                     <span class=b>Number of Matches:</span> 1\n                   </p>\n@@ -3249,7 +3249,7 @@\n                   <td>100%</td>\n                   <td>6.629e-23</td>\n                   <td>86%</td>\n-                  <td><a href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_7?report=genbank&amp;log$=nuclalign">Subject_7</a></td>\n+                  <td><a href="http://example.com/example-genebank?id=Subject_7">Subject_7</a></td>\n                 </tr>\n               </table>\n \n@@ -3266,13 +3266,13 @@\n \n                 <div class=linkheader>\n                   <div class=right><a href="#description55-1">Descriptions</a></div>\n-                  <a class="linkheader" href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_7?report=genbank&amp;log$=nuclalign">Gene Bank</a>\n+                  <a class="linkheader" href="http://example.com/example-genebank?id=Subject_7">Gene Bank</a>\n                 </div>\n \n                 <div class=title>\n                   <p class=hittitle>AY326434|GenBank|insert_MON810|Synthetic_construct_truncated_CRYIA(b)_(cryIA(b))_gene,_partial_CDS|-2635190737607180000</p>\n                   <p class=titleinfo>\n-                    <span class=b>Sequence ID:</span> <a href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_7?report=genbank&amp;log$=nuclalign">Subject_7</a>\n+                    <span class=b>Sequence ID:</span> <a href="http://example.com/example-genebank?id=Subject_7">Subject_7</a>\n                     <span class=b>Length:</span> 4180\n                     <span class=b>Number of Matches:</span> 1\n                   </p>\n@@ -3437,7 +3437,7 @@\n                   <td>100%</td>\n                   <td>6.629e-23</td>\n                   <td>86%</td>\n-                  <td><a href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_8?report=genbank&amp;log$=nuclalign">Subject_8</a></td>\n+                  <td><a href="http://example.com/example-genebank?id=Subject_8">Subject_8</a></td>\n                 </tr>\n               </table>\n \n@@ -3454,13 +3454,13 @@\n \n                 <div class=linkheader>\n                   <div class=right><a href="#description56-1">Descriptions</a></div>\n-                  <a class="linkheader" href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_8?report=genbank&amp;log$=nuclalign">Gene Bank</a>\n+                  <a class="linkheader" href="http://example.com/example-genebank?id=Subject_8">Gene Bank</a>\n                 </div>\n \n                 <div class=title>\n                   <p class=hittitle>EUG|RIKILT|plasmid_pV-ZMBK07|plasmid_pV-ZMBK07|-2635190737607180000</p>\n                   <p class=titleinfo>\n-                    <span class=b>Sequence ID:</span> <a href="http://www.ncbi.nlm.nih.gov/nucleotide/Subject_8?report=genbank&amp;log$=nuclalign">Subject_8</a>\n+                    <span class=b>Sequence ID:</span> <a href="http://example.com/example-genebank?id=Subject_8">Subject_8</a>\n                     <span class=b>Length:</span> 4983\n                     <span class=b>Number of Matches:</span> 1\n                   </p>\n'
b
diff -r 8b3c5d334a84 -r e780606b7c25 test-data/blast xml example4.html
--- a/test-data/blast xml example4.html Mon Jun 30 17:31:26 2014 +0200
+++ b/test-data/blast xml example4.html Mon Jun 30 17:51:56 2014 +0200
b
b'@@ -557,7 +557,7 @@\n                   <td>100%</td>\n                   <td>1.513e-07</td>\n                   <td>100%</td>\n-                  <td><a href="http://www.ncbi.nlm.nih.gov/nucleotide/5?report=genbank&amp;log$=nuclalign">5</a></td>\n+                  <td><a href="http://example.com/example-genebank/AB209952.1/">5</a></td>\n                 </tr>\n                 <tr>\n                   <td><div><a href="#hit1-2"\n@@ -570,7 +570,7 @@\n                   <td>100%</td>\n                   <td>1.513e-07</td>\n                   <td>100%</td>\n-                  <td><a href="http://www.ncbi.nlm.nih.gov/nucleotide/2?report=genbank&amp;log$=nuclalign">2</a></td>\n+                  <td><a href="http://example.com/example-genebank/DJ437711/">2</a></td>\n                 </tr>\n               </table>\n \n@@ -587,13 +587,13 @@\n \n                 <div class=linkheader>\n                   <div class=right><a href="#description1-1">Descriptions</a></div>\n-                  <a class="linkheader" href="http://www.ncbi.nlm.nih.gov/nucleotide/5?report=genbank&amp;log$=nuclalign">Gene Bank</a>\n+                  <a class="linkheader" href="http://example.com/example-genebank/AB209952.1/">Gene Bank</a>\n                 </div>\n \n                 <div class=title>\n                   <p class=hittitle>AB209952.1|GenBank|insert_GTS-40-3-2|Glycine_max_transgenic_cp4epsps_gene_for_5-enol-pyruvylshikimate-3-phospate_synthase_class_2_precursor,_complete_cds|-9105899556052450000</p>\n                   <p class=titleinfo>\n-                    <span class=b>Sequence ID:</span> <a href="http://www.ncbi.nlm.nih.gov/nucleotide/5?report=genbank&amp;log$=nuclalign">gnl|BL_ORD_ID|5</a>\n+                    <span class=b>Sequence ID:</span> <a href="http://example.com/example-genebank/AB209952.1/">gnl|BL_ORD_ID|5</a>\n                     <span class=b>Length:</span> 2457\n                     <span class=b>Number of Matches:</span> 1\n                   </p>\n@@ -629,13 +629,13 @@\n \n                 <div class=linkheader>\n                   <div class=right><a href="#description1-2">Descriptions</a></div>\n-                  <a class="linkheader" href="http://www.ncbi.nlm.nih.gov/nucleotide/2?report=genbank&amp;log$=nuclalign">Gene Bank</a>\n+                  <a class="linkheader" href="http://example.com/example-genebank/DJ437711/">Gene Bank</a>\n                 </div>\n \n                 <div class=title>\n                   <p class=hittitle>DJ437711|GenBank|insert_MIR604|Corn_Event_MIR604,_Left_Border_region|-5751164067366620000</p>\n                   <p class=titleinfo>\n-                    <span class=b>Sequence ID:</span> <a href="http://www.ncbi.nlm.nih.gov/nucleotide/2?report=genbank&amp;log$=nuclalign">gnl|BL_ORD_ID|2</a>\n+                    <span class=b>Sequence ID:</span> <a href="http://example.com/example-genebank/DJ437711/">gnl|BL_ORD_ID|2</a>\n                     <span class=b>Length:</span> 323\n                     <span class=b>Number of Matches:</span> 1\n                   </p>\n@@ -846,7 +846,7 @@\n                   <td>100%</td>\n                   <td>1.513e-07</td>\n                   <td>100%</td>\n-                  <td><a href="http://www.ncbi.nlm.nih.gov/nucleotide/5?report=genbank&amp;log$=nuclalign">5</a></td>\n+                  <td><a href="http://example.com/example-genebank/AB209952.1/">5</a></td>\n                 </tr>\n                 <tr>\n                   <td><div><a href="#hit3-2"\n@@ -859,7 +859,7 @@\n                   <td>100%</td>\n                   <td>1.513e-07</td>\n                   <td>100%</td>\n-                  <td><a href="http://www.ncbi.nlm.nih.gov/nucleotide/2?report=genbank&amp;log$=nuclalign">2</a></td>\n+                  <td><a href="http://example.com/example-genebank/DJ437711/">2</a></td>\n                 </tr>\n                 <tr>\n                   <td><div><a href="#hit3-3"\n@@ -872,7 +872,7 @@\n                   <td>85%</td>\n                   <td>9.334e-06</td>\n                   <td>100%</td>\n-             '..b'+1276,13 @@\n \n                 <div class=linkheader>\n                   <div class=right><a href="#description6-2">Descriptions</a></div>\n-                  <a class="linkheader" href="http://www.ncbi.nlm.nih.gov/nucleotide/6?report=genbank&amp;log$=nuclalign">Gene Bank</a>\n+                  <a class="linkheader" href="http://example.com/example-genebank/AY326434/">Gene Bank</a>\n                 </div>\n \n                 <div class=title>\n                   <p class=hittitle>AY326434|GenBank|insert_MON810|Synthetic_construct_truncated_CRYIA(b)_(cryIA(b))_gene,_partial_CDS|-2635190737607180000</p>\n                   <p class=titleinfo>\n-                    <span class=b>Sequence ID:</span> <a href="http://www.ncbi.nlm.nih.gov/nucleotide/6?report=genbank&amp;log$=nuclalign">gnl|BL_ORD_ID|6</a>\n+                    <span class=b>Sequence ID:</span> <a href="http://example.com/example-genebank/AY326434/">gnl|BL_ORD_ID|6</a>\n                     <span class=b>Length:</span> 4180\n                     <span class=b>Number of Matches:</span> 1\n                   </p>\n@@ -1455,7 +1455,7 @@\n                   <td>100%</td>\n                   <td>3.564e-15</td>\n                   <td>86%</td>\n-                  <td><a href="http://www.ncbi.nlm.nih.gov/nucleotide/7?report=genbank&amp;log$=nuclalign">7</a></td>\n+                  <td><a href="http://example.com/example-genebank/EUG/">7</a></td>\n                 </tr>\n                 <tr>\n                   <td><div><a href="#hit7-2"\n@@ -1468,7 +1468,7 @@\n                   <td>100%</td>\n                   <td>3.564e-15</td>\n                   <td>86%</td>\n-                  <td><a href="http://www.ncbi.nlm.nih.gov/nucleotide/6?report=genbank&amp;log$=nuclalign">6</a></td>\n+                  <td><a href="http://example.com/example-genebank/AY326434/">6</a></td>\n                 </tr>\n               </table>\n \n@@ -1485,13 +1485,13 @@\n \n                 <div class=linkheader>\n                   <div class=right><a href="#description7-1">Descriptions</a></div>\n-                  <a class="linkheader" href="http://www.ncbi.nlm.nih.gov/nucleotide/7?report=genbank&amp;log$=nuclalign">Gene Bank</a>\n+                  <a class="linkheader" href="http://example.com/example-genebank/EUG/">Gene Bank</a>\n                 </div>\n \n                 <div class=title>\n                   <p class=hittitle>EUG|RIKILT|plasmid_pV-ZMBK07|plasmid_pV-ZMBK07|-2635190737607180000</p>\n                   <p class=titleinfo>\n-                    <span class=b>Sequence ID:</span> <a href="http://www.ncbi.nlm.nih.gov/nucleotide/7?report=genbank&amp;log$=nuclalign">gnl|BL_ORD_ID|7</a>\n+                    <span class=b>Sequence ID:</span> <a href="http://example.com/example-genebank/EUG/">gnl|BL_ORD_ID|7</a>\n                     <span class=b>Length:</span> 4983\n                     <span class=b>Number of Matches:</span> 1\n                   </p>\n@@ -1530,13 +1530,13 @@\n \n                 <div class=linkheader>\n                   <div class=right><a href="#description7-2">Descriptions</a></div>\n-                  <a class="linkheader" href="http://www.ncbi.nlm.nih.gov/nucleotide/6?report=genbank&amp;log$=nuclalign">Gene Bank</a>\n+                  <a class="linkheader" href="http://example.com/example-genebank/AY326434/">Gene Bank</a>\n                 </div>\n \n                 <div class=title>\n                   <p class=hittitle>AY326434|GenBank|insert_MON810|Synthetic_construct_truncated_CRYIA(b)_(cryIA(b))_gene,_partial_CDS|-2635190737607180000</p>\n                   <p class=titleinfo>\n-                    <span class=b>Sequence ID:</span> <a href="http://www.ncbi.nlm.nih.gov/nucleotide/6?report=genbank&amp;log$=nuclalign">gnl|BL_ORD_ID|6</a>\n+                    <span class=b>Sequence ID:</span> <a href="http://example.com/example-genebank/AY326434/">gnl|BL_ORD_ID|6</a>\n                     <span class=b>Length:</span> 4180\n                     <span class=b>Number of Matches:</span> 1\n                   </p>\n'
b
diff -r 8b3c5d334a84 -r e780606b7c25 test-data/blastdb.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastdb.loc Mon Jun 30 17:51:56 2014 +0200
[
@@ -0,0 +1,39 @@
+#This is a sample file distributed with Galaxy that is used to define a
+#list of nucleotide BLAST databases, using three columns tab separated
+#(longer whitespace are TAB characters):
+#
+#<unique_id> <database_caption> <base_name_path>
+#
+#The captions typically contain spaces and might end with the build date.
+#It is important that the actual database name does not have a space in
+#it, and that there are only two tabs on each line.
+#
+#So, for example, if your database is nt and the path to your base name 
+#is /depot/data2/galaxy/blastdb/nt/nt.chunk, then the blastdb.loc entry 
+#would look like this:
+#
+#nt_02_Dec_2009      nt 02 Dec 2009      /depot/data2/galaxy/blastdb/nt/nt.chunk
+#
+#and your /depot/data2/galaxy/blastdb/nt directory would contain all of 
+#your "base names" (e.g.):
+#
+#-rw-r--r--  1 wychung galaxy  23437408 2008-04-09 11:26 nt.chunk.00.nhr
+#-rw-r--r--  1 wychung galaxy   3689920 2008-04-09 11:26 nt.chunk.00.nin
+#-rw-r--r--  1 wychung galaxy 251215198 2008-04-09 11:26 nt.chunk.00.nsq
+#...etc...
+#
+#Your blastdb.loc file should include an entry per line for each "base name" 
+#you have stored.  For example:
+#
+#nt_02_Dec_2009 nt 02 Dec 2009 /depot/data2/galaxy/blastdb/nt/nt.chunk
+#wgs_30_Nov_2009 wgs 30 Nov 2009 /depot/data2/galaxy/blastdb/wgs/wgs.chunk
+#test_20_Sep_2008 test 20 Sep 2008 /depot/data2/galaxy/blastdb/test/test
+#...etc...
+#
+#You can download the NCBI provided protein databases like NT from here:
+#ftp://ftp.ncbi.nlm.nih.gov/blast/db/
+#
+#See also blastdb_p.loc which is for any protein BLAST database, and
+#blastdb_d.loc which is for any protein domains databases (like CDD).
+
+EUginius_plasmid Euginius plasmid insert /opt/galaxy/blastdbs/EUginius_plasmid_insert http://example.com/example-genebank/{defline[0]}/
b
diff -r 8b3c5d334a84 -r e780606b7c25 test-data/refresh_test_results.sh
--- a/test-data/refresh_test_results.sh Mon Jun 30 17:31:26 2014 +0200
+++ b/test-data/refresh_test_results.sh Mon Jun 30 17:51:56 2014 +0200
b
@@ -4,7 +4,8 @@
 
 set -eu
 
-for i in 1 2 3 4
-do
- "$@" ../blast2html.py -i "blast xml example$i.xml" -o "blast xml example$i.html"
-done
+"$@" ../blast2html.py -i "blast xml example1.xml" -o "blast xml example1.html"
+"$@" ../blast2html.py -i "blast xml example2.xml" -o "blast xml example2.html"
+"$@" ../blast2html.py -i "blast xml example3.xml" -o "blast xml example3.html" --genelink-template 'http://example.com/example-genebank?id={fullid}'
+"$@" ../blast2html.py -i "blast xml example4.xml" -o "blast xml example4.html" --db-config-dir .
+