Mercurial > repos > greg > data_manager_vsnp_genbank
changeset 2:df67ba6db5de draft default tip
Uploaded
author | greg |
---|---|
date | Wed, 06 May 2020 10:58:29 -0400 |
parents | f136bf6d2f4c |
children | |
files | data_manager/vsnp_genbank_fetcher.xml test-data/AF2122.fa test-data/all_fasta.loc test-data/vsnp_genbank.json test-data/vsnp_genbank.loc tool-data/all_fasta.loc.sample tool-data/vsnp_genbank.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 9 files changed, 72 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/vsnp_genbank_fetcher.xml Mon Feb 10 10:27:08 2020 -0500 +++ b/data_manager/vsnp_genbank_fetcher.xml Wed May 06 10:58:29 2020 -0400 @@ -23,6 +23,9 @@ </outputs> <tests> <test> + <param name="all_fasta_source" value="AF2122"/> + <param name="url" value="https://github.com/USDA-VS/vSNP_reference_options/raw/master/Mycobacterium_AF2122/NC_002945v4.gbk"/> + <output name="out_file" value="vsnp_genbank.json" compare="contains"/> </test> </tests> <help><![CDATA[
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/AF2122.fa Wed May 06 10:58:29 2020 -0400 @@ -0,0 +1,11 @@ +>NC_002945.4 Mycobacterium bovis AF2122/97 genome assembly, chromosome: Mycobacterium_bovis_AF2122/97 +TTGACCGATGACCCCGGTTCAGGCTTCACCACAGTGTGGAACGCGGTCGTCTCCGAACTTAACGGCGACC +CTAAGGTTGACGACGGACCCAGCAGTGATGCTAATCTCAGCGCTCCGCTGACCCCTCAGCAAAGGGCTTG +GCTCAATCTCGTCCAGCCATTGACCATCGTCGAGGGGTTTGCTCTGTTATCCGTGCCGAGCAGCTTTGTC +CAAAACGAAATCGAGCGCCATCTGCGGGCCCCGATTACCGACGCTCTCAGCCGCCGACTCGGACATCAGA +TCCAACTCGGGGTCCGCATCGCTCCGCCGGCGACCGACGAAGCCGACGACACTACCGTGCCGCCTTCCGA +AAATCCTGCTACCACATCGCCAGACACCACAACCGACAACGACGAGATTGATGACAGCGCTGCGGCACGG +GGCGATAACCAGCACAGTTGGCCAAGTTACTTCACCGAGCGCCCGCGCAATACCGATTCCGCTACCGCTG +GCGTAACCAGCCTTAACCGTCGCTACACCTTTGATACGTTCGTTATCGGCGCCTCCAACCGGTTCGCGCA +CGCCGCCGCCTTGGCGATCGCAGAAGCACCCGCCCGCGCTTACAACCCCCTGTTCATCTGGGGCGAGTCC +GGTCTCGGCAAGACACACCTGCTACACGCGGCAGGCAACTATGCCCAACGGTTGTTCCCGGGAATGCGGG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/all_fasta.loc Wed May 06 10:58:29 2020 -0400 @@ -0,0 +1,19 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# +AF2122 AF2122 AF2122 ${__HERE__}/AF2122.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/vsnp_genbank.json Wed May 06 10:58:29 2020 -0400 @@ -0,0 +1,1 @@ +{"data_tables": {"vsnp_genbank": {"description": "Genbank file for AF2122", "name": "NC_002945v4.gbk", "path":
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/vsnp_genbank.loc Wed May 06 10:58:29 2020 -0400 @@ -0,0 +1,1 @@ +AF2122 NC_002945v4.gbk /private/var/folders/zf/cf4s7k3530j87w9fkhh475800000gn/T/tmpFxGvOE/tmp3MdYaQ/tmp5lwt8e/database/data_manager_tool-datanmleBG/vsnp/AF2122/genbank/NC_002945v4.gbk Genbank file for AF2122
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Wed May 06 10:58:29 2020 -0400 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +#
--- a/tool-data/vsnp_genbank.loc.sample Mon Feb 10 10:27:08 2020 -0500 +++ b/tool-data/vsnp_genbank.loc.sample Wed May 06 10:58:29 2020 -0400 @@ -1,4 +1,4 @@ ## vSNP Genbank files #Value Name Path Description #AF2122 Mycobacterium_AF2122/NC_002945v4.gbk vsnp/AF2122/Mycobacterium_AF2122/NC_002945v4.gbk Genbank file for Mycobacterium bovis AF2122/97 -#NC_006932 Brucella_abortus1/NC_006932-NC_006933.gbk /vsnp/NC_006932/Brucella_abortus1/NC_006932-NC_006933.gbk Genbank file for Brucella abortus bv. 1 str. 9-941 +#NC_006932 Brucella_abortus1/NC_006932-NC_006933.gbk vsnp/NC_006932/Brucella_abortus1/NC_006932-NC_006933.gbk Genbank file for Brucella abortus bv. 1 str. 9-941
--- a/tool_data_table_conf.xml.sample Mon Feb 10 10:27:08 2020 -0500 +++ b/tool_data_table_conf.xml.sample Wed May 06 10:58:29 2020 -0400 @@ -1,4 +1,10 @@ <tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> + <!-- Location of genbank files for vsnp_genbank version 0.0.8 --> <table name="vsnp_genbank" comment_char="#"> <columns>value, name, path, description</columns> <file path="tool-data/vsnp_genbank.loc" />
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Wed May 06 10:58:29 2020 -0400 @@ -0,0 +1,12 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/all_fasta.loc" /> + </table> + <!-- Location of genbank files for vsnp_genbank version 0.0.8 --> + <table name="vsnp_genbank" comment_char="#"> + <columns>value, name, path, description</columns> + <file path="${__HERE__}/test-data/vsnp_genbank.loc" /> + </table> +</tables>