changeset 2:df67ba6db5de draft default tip

Uploaded
author greg
date Wed, 06 May 2020 10:58:29 -0400
parents f136bf6d2f4c
children
files data_manager/vsnp_genbank_fetcher.xml test-data/AF2122.fa test-data/all_fasta.loc test-data/vsnp_genbank.json test-data/vsnp_genbank.loc tool-data/all_fasta.loc.sample tool-data/vsnp_genbank.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 9 files changed, 72 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/vsnp_genbank_fetcher.xml	Mon Feb 10 10:27:08 2020 -0500
+++ b/data_manager/vsnp_genbank_fetcher.xml	Wed May 06 10:58:29 2020 -0400
@@ -23,6 +23,9 @@
     </outputs>
     <tests>
         <test>
+            <param name="all_fasta_source" value="AF2122"/>
+            <param name="url" value="https://github.com/USDA-VS/vSNP_reference_options/raw/master/Mycobacterium_AF2122/NC_002945v4.gbk"/>
+            <output name="out_file" value="vsnp_genbank.json" compare="contains"/>
         </test>
     </tests>
     <help><![CDATA[
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/AF2122.fa	Wed May 06 10:58:29 2020 -0400
@@ -0,0 +1,11 @@
+>NC_002945.4 Mycobacterium bovis AF2122/97 genome assembly, chromosome: Mycobacterium_bovis_AF2122/97
+TTGACCGATGACCCCGGTTCAGGCTTCACCACAGTGTGGAACGCGGTCGTCTCCGAACTTAACGGCGACC
+CTAAGGTTGACGACGGACCCAGCAGTGATGCTAATCTCAGCGCTCCGCTGACCCCTCAGCAAAGGGCTTG
+GCTCAATCTCGTCCAGCCATTGACCATCGTCGAGGGGTTTGCTCTGTTATCCGTGCCGAGCAGCTTTGTC
+CAAAACGAAATCGAGCGCCATCTGCGGGCCCCGATTACCGACGCTCTCAGCCGCCGACTCGGACATCAGA
+TCCAACTCGGGGTCCGCATCGCTCCGCCGGCGACCGACGAAGCCGACGACACTACCGTGCCGCCTTCCGA
+AAATCCTGCTACCACATCGCCAGACACCACAACCGACAACGACGAGATTGATGACAGCGCTGCGGCACGG
+GGCGATAACCAGCACAGTTGGCCAAGTTACTTCACCGAGCGCCCGCGCAATACCGATTCCGCTACCGCTG
+GCGTAACCAGCCTTAACCGTCGCTACACCTTTGATACGTTCGTTATCGGCGCCTCCAACCGGTTCGCGCA
+CGCCGCCGCCTTGGCGATCGCAGAAGCACCCGCCCGCGCTTACAACCCCCTGTTCATCTGGGGCGAGTCC
+GGTCTCGGCAAGACACACCTGCTACACGCGGCAGGCAACTATGCCCAACGGTTGTTCCCGGGAATGCGGG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc	Wed May 06 10:58:29 2020 -0400
@@ -0,0 +1,19 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>		<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3		/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19		Human (Homo sapiens): hg19 Canonical		/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19		Human (Homo sapiens): hg19 Full			/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
+AF2122	AF2122	AF2122	${__HERE__}/AF2122.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vsnp_genbank.json	Wed May 06 10:58:29 2020 -0400
@@ -0,0 +1,1 @@
+{"data_tables": {"vsnp_genbank": {"description": "Genbank file for AF2122", "name": "NC_002945v4.gbk", "path":
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vsnp_genbank.loc	Wed May 06 10:58:29 2020 -0400
@@ -0,0 +1,1 @@
+AF2122	NC_002945v4.gbk	/private/var/folders/zf/cf4s7k3530j87w9fkhh475800000gn/T/tmpFxGvOE/tmp3MdYaQ/tmp5lwt8e/database/data_manager_tool-datanmleBG/vsnp/AF2122/genbank/NC_002945v4.gbk	Genbank file for AF2122
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Wed May 06 10:58:29 2020 -0400
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>		<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3		/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19		Human (Homo sapiens): hg19 Canonical		/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19		Human (Homo sapiens): hg19 Full			/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- a/tool-data/vsnp_genbank.loc.sample	Mon Feb 10 10:27:08 2020 -0500
+++ b/tool-data/vsnp_genbank.loc.sample	Wed May 06 10:58:29 2020 -0400
@@ -1,4 +1,4 @@
 ## vSNP Genbank files
 #Value	Name	Path	Description
 #AF2122	Mycobacterium_AF2122/NC_002945v4.gbk	vsnp/AF2122/Mycobacterium_AF2122/NC_002945v4.gbk	Genbank file for Mycobacterium bovis AF2122/97
-#NC_006932	Brucella_abortus1/NC_006932-NC_006933.gbk	/vsnp/NC_006932/Brucella_abortus1/NC_006932-NC_006933.gbk	Genbank file for Brucella abortus bv. 1 str. 9-941
+#NC_006932	Brucella_abortus1/NC_006932-NC_006933.gbk	vsnp/NC_006932/Brucella_abortus1/NC_006932-NC_006933.gbk	Genbank file for Brucella abortus bv. 1 str. 9-941
--- a/tool_data_table_conf.xml.sample	Mon Feb 10 10:27:08 2020 -0500
+++ b/tool_data_table_conf.xml.sample	Wed May 06 10:58:29 2020 -0400
@@ -1,4 +1,10 @@
 <tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+    <!-- Location of genbank files for vsnp_genbank version 0.0.8 -->
     <table name="vsnp_genbank" comment_char="#">
         <columns>value, name, path, description</columns>
         <file path="tool-data/vsnp_genbank.loc" />
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Wed May 06 10:58:29 2020 -0400
@@ -0,0 +1,12 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+    <!-- Location of genbank files for vsnp_genbank version 0.0.8 -->
+    <table name="vsnp_genbank" comment_char="#">
+        <columns>value, name, path, description</columns>
+        <file path="${__HERE__}/test-data/vsnp_genbank.loc" />
+    </table>
+</tables>