Mercurial > repos > greg > data_manager_vsnp_excel

--- a/data_manager/vsnp_excel_fetcher.xml	Mon Feb 10 09:29:37 2020 -0500
+++ b/data_manager/vsnp_excel_fetcher.xml	Wed May 06 10:10:03 2020 -0400
@@ -23,6 +23,9 @@
     </outputs>
     <tests>
         <test>
+            <param name="all_fasta_source" value="AF2122"/>
+            <param name="url" value="https://github.com/USDA-VS/vSNP_reference_options/raw/master/Mycobacterium_AF2122/Mbovis_define_filter.xlsx"/>
+            <output name="out_file" value="vsnp_excel.json" compare="contains"/>
         </test>
     </tests>
     <help><![CDATA[
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/AF2122.fa	Wed May 06 10:10:03 2020 -0400
@@ -0,0 +1,11 @@
+>NC_002945.4 Mycobacterium bovis AF2122/97 genome assembly, chromosome: Mycobacterium_bovis_AF2122/97
+TTGACCGATGACCCCGGTTCAGGCTTCACCACAGTGTGGAACGCGGTCGTCTCCGAACTTAACGGCGACC
+CTAAGGTTGACGACGGACCCAGCAGTGATGCTAATCTCAGCGCTCCGCTGACCCCTCAGCAAAGGGCTTG
+GCTCAATCTCGTCCAGCCATTGACCATCGTCGAGGGGTTTGCTCTGTTATCCGTGCCGAGCAGCTTTGTC
+CAAAACGAAATCGAGCGCCATCTGCGGGCCCCGATTACCGACGCTCTCAGCCGCCGACTCGGACATCAGA
+TCCAACTCGGGGTCCGCATCGCTCCGCCGGCGACCGACGAAGCCGACGACACTACCGTGCCGCCTTCCGA
+AAATCCTGCTACCACATCGCCAGACACCACAACCGACAACGACGAGATTGATGACAGCGCTGCGGCACGG
+GGCGATAACCAGCACAGTTGGCCAAGTTACTTCACCGAGCGCCCGCGCAATACCGATTCCGCTACCGCTG
+GCGTAACCAGCCTTAACCGTCGCTACACCTTTGATACGTTCGTTATCGGCGCCTCCAACCGGTTCGCGCA
+CGCCGCCGCCTTGGCGATCGCAGAAGCACCCGCCCGCGCTTACAACCCCCTGTTCATCTGGGGCGAGTCC
+GGTCTCGGCAAGACACACCTGCTACACGCGGCAGGCAACTATGCCCAACGGTTGTTCCCGGGAATGCGGG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc	Wed May 06 10:10:03 2020 -0400
@@ -0,0 +1,19 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>		<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3		/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19		Human (Homo sapiens): hg19 Canonical		/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19		Human (Homo sapiens): hg19 Full			/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
+AF2122	AF2122	AF2122	${__HERE__}/AF2122.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vsnp_excel.json	Wed May 06 10:10:03 2020 -0400
@@ -0,0 +1,1 @@
+{"data_tables": {"vsnp_excel": {"description": "Excel file for AF2122", "name": "Mbovis_define_filter.xlsx", "path":
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Wed May 06 10:10:03 2020 -0400
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>		<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3		/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19		Human (Homo sapiens): hg19 Canonical		/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19		Human (Homo sapiens): hg19 Full			/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- a/tool-data/vsnp_excel.loc.sample	Mon Feb 10 09:29:37 2020 -0500
+++ b/tool-data/vsnp_excel.loc.sample	Wed May 06 10:10:03 2020 -0400
@@ -1,4 +1,4 @@
 ## vSNP Excel files
 #Value	Name	Path	Description
 #AF2122	Mbovis_define_filter.xlsx	vsnp/AF2122/Mbovis_define_filter.xlsx	Excel file for Mycobacterium bovis AF2122/97
-#NC_006932	Bab1_define_filter.xlsx	/vsnp/NC_006932/Bab1_define_filter.xlsx	Excel file for Brucella abortus bv. 1 str. 9-941
+#NC_006932	Bab1_define_filter.xlsx	vsnp/NC_006932/Bab1_define_filter.xlsx	Excel file for Brucella abortus bv. 1 str. 9-941
--- a/tool_data_table_conf.xml.sample	Mon Feb 10 09:29:37 2020 -0500
+++ b/tool_data_table_conf.xml.sample	Wed May 06 10:10:03 2020 -0400
@@ -1,4 +1,10 @@
 <tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+    <!-- Location of excel files for vsnp_excel version 0.0.8 -->
     <table name="vsnp_excel" comment_char="#">
         <columns>value, name, path, description</columns>
         <file path="tool-data/vsnp_excel.loc" />
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Wed May 06 10:10:03 2020 -0400
@@ -0,0 +1,12 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+    <!-- Location of excel files for vsnp_excel version 0.0.8 -->
+    <table name="vsnp_excel" comment_char="#">
+        <columns>value, name, path, description</columns>
+        <file path="${__HERE__}/test-data/vsnp_excel.loc" />
+    </table>
+</tables>