changeset 1:5c4173a0b2c6 draft

Uploaded
author greg
date Tue, 05 May 2020 15:08:48 -0400
parents f58178a5eebc
children 50c71f5a632c
files data_manager/vsnp_dnaprints_fetcher.xml test-data/AF2122.fa test-data/all_fasta.loc test-data/vsnp_dnaprints.json test-data/vsnp_dnaprints.loc tool-data/vsnp_dnaprints.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 7 files changed, 55 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/vsnp_dnaprints_fetcher.xml	Fri Feb 14 13:41:36 2020 -0500
+++ b/data_manager/vsnp_dnaprints_fetcher.xml	Tue May 05 15:08:48 2020 -0400
@@ -16,13 +16,16 @@
         <param name="all_fasta_source" type="select" label="FASTA reference">
             <options from_data_table="all_fasta"/>
         </param>
-        <param name="url" type="text" value="" label="URL to download the DNAprints file associated with the selected FASTA reference" optional="False" />
+        <param name="url" type="text" value="" label="URL to download the DNAprints file associated with the selected FASTA reference" optional="False"/>
     </inputs>
     <outputs>
-        <data name="out_file" format="data_manager_json" />
+        <data name="out_file" format="data_manager_json"/>
     </outputs>
     <tests>
         <test>
+            <param name="all_fasta_source" value="AF2122"/>
+            <param name="url" value="https://raw.githubusercontent.com/USDA-VS/vSNP_reference_options/master/Mycobacterium_AF2122/NC_002945v4.yml"/>
+            <output name="out_file" value="vsnp_dnaprints.json" compare="contains"/>
         </test>
     </tests>
     <help><![CDATA[
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/AF2122.fa	Tue May 05 15:08:48 2020 -0400
@@ -0,0 +1,11 @@
+>NC_002945.4 Mycobacterium bovis AF2122/97 genome assembly, chromosome: Mycobacterium_bovis_AF2122/97
+TTGACCGATGACCCCGGTTCAGGCTTCACCACAGTGTGGAACGCGGTCGTCTCCGAACTTAACGGCGACC
+CTAAGGTTGACGACGGACCCAGCAGTGATGCTAATCTCAGCGCTCCGCTGACCCCTCAGCAAAGGGCTTG
+GCTCAATCTCGTCCAGCCATTGACCATCGTCGAGGGGTTTGCTCTGTTATCCGTGCCGAGCAGCTTTGTC
+CAAAACGAAATCGAGCGCCATCTGCGGGCCCCGATTACCGACGCTCTCAGCCGCCGACTCGGACATCAGA
+TCCAACTCGGGGTCCGCATCGCTCCGCCGGCGACCGACGAAGCCGACGACACTACCGTGCCGCCTTCCGA
+AAATCCTGCTACCACATCGCCAGACACCACAACCGACAACGACGAGATTGATGACAGCGCTGCGGCACGG
+GGCGATAACCAGCACAGTTGGCCAAGTTACTTCACCGAGCGCCCGCGCAATACCGATTCCGCTACCGCTG
+GCGTAACCAGCCTTAACCGTCGCTACACCTTTGATACGTTCGTTATCGGCGCCTCCAACCGGTTCGCGCA
+CGCCGCCGCCTTGGCGATCGCAGAAGCACCCGCCCGCGCTTACAACCCCCTGTTCATCTGGGGCGAGTCC
+GGTCTCGGCAAGACACACCTGCTACACGCGGCAGGCAACTATGCCCAACGGTTGTTCCCGGGAATGCGGG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc	Tue May 05 15:08:48 2020 -0400
@@ -0,0 +1,19 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>		<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3		/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19		Human (Homo sapiens): hg19 Canonical		/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19		Human (Homo sapiens): hg19 Full			/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
+AF2122	AF2122	AF2122	${__HERE__}/AF2122.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vsnp_dnaprints.json	Tue May 05 15:08:48 2020 -0400
@@ -0,0 +1,1 @@
+{"data_tables": {"vsnp_dnaprints": {"description": "DNAprints file for AF2122", "name": "NC_002945v4.yml", "path":
--- a/tool-data/vsnp_dnaprints.loc.sample	Fri Feb 14 13:41:36 2020 -0500
+++ b/tool-data/vsnp_dnaprints.loc.sample	Tue May 05 15:08:48 2020 -0400
@@ -1,4 +1,4 @@
 ## vSNP DNAprints files
 #Value	Name	Path	Description
 #AF2122	Mycobacterium_AF2122/NC_002945v4.yml	vsnp/AF2122/Mycobacterium_AF2122/NC_002945v4.yml	DNAprints file for Mycobacterium bovis AF2122/97
-#NC_006932	Brucella_abortus1/NC_006932-NC_006933.yml	/vsnp/NC_006932/Brucella_abortus1/NC_006932-NC_006933.yml	DNAprints file for Brucella abortus bv. 1 str. 9-941
+#NC_006932	Brucella_abortus1/NC_006932-NC_006933.yml	vsnp/NC_006932/Brucella_abortus1/NC_006932-NC_006933.yml	DNAprints file for Brucella abortus bv. 1 str. 9-941
--- a/tool_data_table_conf.xml.sample	Fri Feb 14 13:41:36 2020 -0500
+++ b/tool_data_table_conf.xml.sample	Tue May 05 15:08:48 2020 -0400
@@ -1,4 +1,10 @@
 <tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+    <!-- Location of dnaprints files for vsnp_dnaprints version 1.0.0 -->
     <table name="vsnp_dnaprints" comment_char="#">
         <columns>value, name, path, description</columns>
         <file path="tool-data/vsnp_dnaprints.loc" />
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Tue May 05 15:08:48 2020 -0400
@@ -0,0 +1,12 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+    <!-- Location of dnaprints files for vsnp_dnaprints version 1.0.0 -->
+    <table name="vsnp_dnaprints" comment_char="#">
+        <columns>value, name, path, description</columns>
+        <file path="${__HERE__}/test-data/vsnp_dnaprints.loc" />
+    </table>
+</tables>