Previous changeset 3:db04e12b8779 (2015-02-21) Next changeset 5:e159dbecdad6 (2015-02-21) |
Commit message:
EMBL testcase added |
modified:
gbk2rdf/gbktordf.xml |
added:
.project .pydevproject gbk2rdf/test-data/CP009049.embl |
b |
diff -r db04e12b8779 -r 47d1b27466ee .project --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.project Sat Feb 21 13:49:11 2015 +0100 |
b |
@@ -0,0 +1,17 @@ +<?xml version="1.0" encoding="UTF-8"?> +<projectDescription> + <name>sapp</name> + <comment></comment> + <projects> + </projects> + <buildSpec> + <buildCommand> + <name>org.python.pydev.PyDevBuilder</name> + <arguments> + </arguments> + </buildCommand> + </buildSpec> + <natures> + <nature>org.python.pydev.pythonNature</nature> + </natures> +</projectDescription> |
b |
diff -r db04e12b8779 -r 47d1b27466ee .pydevproject --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.pydevproject Sat Feb 21 13:49:11 2015 +0100 |
b |
@@ -0,0 +1,5 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<?eclipse-pydev version="1.0"?><pydev_project> +<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property> +<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property> +</pydev_project> |
b |
diff -r db04e12b8779 -r 47d1b27466ee gbk2rdf/gbktordf.xml --- a/gbk2rdf/gbktordf.xml Sat Feb 21 07:28:39 2015 -0500 +++ b/gbk2rdf/gbktordf.xml Sat Feb 21 13:49:11 2015 +0100 |
b |
@@ -6,7 +6,7 @@ <description>Genbank to RDF conversion</description> <command interpreter="python3.4">gbktordf.py '-input' '$input' -output '$output' -sourcedb "$format" -format "$format"</command> <inputs> - <param name="input" type="data" format="gbk,gb,genbank" label="Genbank file"/> + <param name="input" type="data" format="gbk,gb,genbank,embl" label="Genbank file"/> <param name="format" type="select" label="EMBL/GBK"> <option value="genbank" selected="true"> Genbank</option> <option value="embl"> EMBL </option> @@ -24,6 +24,12 @@ <output name="$format" value="genbank"/> <output name="$sourcedb" value="genbank"/> </test> + <test> + <param name="input" value="test-data/CP009049.embl"/> + <output name="$output" file="CP009049.rdf"/> + <output name="$format" value="embl"/> + <output name="$sourcedb" value="embl"/> + </test> </tests> <help> |
b |
diff -r db04e12b8779 -r 47d1b27466ee gbk2rdf/test-data/CP009049.embl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gbk2rdf/test-data/CP009049.embl Sat Feb 21 13:49:11 2015 +0100 |
[ |
b'@@ -0,0 +1,157312 @@\n+ID CP009049; SV 1; circular; genomic DNA; STD; PRO; 4599018 BP.\n+XX\n+AC CP009049;\n+XX\n+PR Project:PRJNA255737;\n+XX\n+DT 13-FEB-2015 (Rel. 123, Created)\n+DT 13-FEB-2015 (Rel. 123, Last updated, Version 1)\n+XX\n+DE Salmonella enterica subsp. enterica serovar Paratyphi A strain CMCC 50973,\n+DE complete genome.\n+XX\n+KW .\n+XX\n+OS Salmonella enterica subsp. enterica serovar Paratyphi A\n+OC Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacteriales;\n+OC Enterobacteriaceae; Salmonella.\n+XX\n+RN [1]\n+RP 1-4599018\n+RA Wang B., Liang H., Liu X., Zhu L., Wang H., Zeng M.;\n+RT "Whole Genome Sequences of two Salmonella paratyphi A strains";\n+RL Unpublished.\n+XX\n+RN [2]\n+RP 1-4599018\n+RA Wang B., Liang H., Liu X., Zhu L., Wang H., Zeng M.;\n+RT ;\n+RL Submitted (24-JUL-2014) to the INSDC.\n+RL State Key Laboratory of Pathogen and Biosecurity, Beijing Institute of\n+RL Biotechnology, 20 Dongdajie, Fengtai District, Beijing, Beijing 100071,\n+RL China\n+XX\n+DR MD5; e41a6215bf412b701febd8d4b182ec0c.\n+DR BioSample; SAMN02909989.\n+XX\n+CC Source DNA/bacteria are available from National Center for Medical\n+CC Culture Collection (CMCC) in China.\n+CC Annotation was added by the NCBI Prokaryotic Genome Annotation\n+CC Pipeline (released 2013). Information about the Pipeline can be\n+CC found here: http://www.ncbi.nlm.nih.gov/genome/annotation_prok/\n+CC ##Genome-Assembly-Data-START##\n+CC Assembly Method :: SOAPdenovo v. 2011.04\n+CC Assembly Name :: CMCC(B) 50973\n+CC Genome Coverage :: 133x\n+CC Sequencing Technology :: Illumina\n+CC ##Genome-Assembly-Data-END##\n+CC ##Genome-Annotation-Data-START##\n+CC Annotation Provider :: NCBI\n+CC Annotation Date :: 07/25/2014 13:43:31\n+CC Annotation Pipeline :: NCBI Prokaryotic Genome Annotation\n+CC Pipeline\n+CC Annotation Method :: Best-placed reference protein set;\n+CC GeneMarkS+\n+CC Annotation Software revision :: 2.6 (rev. 440435)\n+CC Features Annotated :: Gene; CDS; rRNA; tRNA; ncRNA;\n+CC repeat_region\n+CC Genes :: 4,309\n+CC CDS :: 4,016\n+CC Pseudo Genes :: 166\n+CC CRISPR Arrays :: 2\n+CC rRNAs :: 20 ( 5S, 16S, 23S )\n+CC tRNAs :: 100\n+CC ncRNA :: 7\n+CC Frameshifted Genes :: 106\n+CC ##Genome-Annotation-Data-END##\n+XX\n+FH Key Location/Qualifiers\n+FH\n+FT source 1..4599018\n+FT /organism="Salmonella enterica subsp. enterica serovar\n+FT Paratyphi A"\n+FT /host="Homo sapiens"\n+FT /sub_species="enterica"\n+FT /strain="CMCC 50973"\n+FT /mol_type="genomic DNA"\n+FT /country="China:Jiangsu"\n+FT /lat_lon="32.04 N 118.78 E"\n+FT /collection_date="2003-06-01"\n+FT /serovar="Paratyphi A"\n+FT /db_xref="taxon:54388"\n+FT /culture_collection="CMCC:50973"\n+FT gene complement(129..713)\n+FT /gene="mobA"\n+FT /locus_tag="IT63_00010"\n+FT CDS complement(129..713)\n+FT /codon_start=1\n+FT /transl_table=11\n+FT /gene="mobA"\n+FT /locus_tag="IT63_00010"\n+FT /product="molybdopterin-guanine dinucleotide biosynthesis\n+FT protein MobA"\n+FT /note="in Escherichia coli MobA links a guanosine\n+FT 5\'-phosphate to molydopterin to form molybdopterin guanine\n+FT dinucleotide during molybdenum cofactor biosynthesis;\n+FT Derived by automated c'..b'cgag cgaacgggga ggagcccaga gcctgaatca gcatgtgtgt 4596180\n+ tagtggaagc gtctggaaag gcgcgcgata cagggtgaca gccccgtaca caaaagcgca 4596240\n+ tgtgctgtga gctcgatgag tagggcggga cacgtggtat cctgtctgaa tatgggggga 4596300\n+ ccatcctcca aggctaaata ctaattttgc tctttaaaaa tctggatcaa gctgaaaatt 4596360\n+ gaaacacaga acaacgaaag ttgttcgtga gtctctcaaa ttttcgcaac acgatgatga 4596420\n+ atcgtaagaa acatcttcgg gttgtgaggt taagcgacta agcgtacacg gtggatgccc 4596480\n+ tggcagtcag aggcgatgaa ggacgtgcta atctgcgata agcgccggta aggtgatatg 4596540\n+ aaccgttata accggcgatt tccgaatggg gaaacccagt gtgattcgtc acactatcat 4596600\n+ taactgaatc cataggttaa tgaggcgaac cgggggaact gaaacatcta agtaccccga 4596660\n+ ggaaaagaaa tcaaccgaga ttcccccagt agcggcgagc gaacggggag gagcccagag 4596720\n+ cctgaatcag catgtgtgtt agtggaagcg tctggaaagg cgcgcgatac agggtgacag 4596780\n+ ccccgtacac aaaagcgcat gtgctgtgag ctcgatgagt agggcgggac acgtggtatc 4596840\n+ ctgtctgaat atggggggac catcctccaa ggctaaatac tcctgactga ccgatagtga 4596900\n+ accagtaccg tgagggaaag gcgaaaagaa ccccggcgag gggagtgaaa aagaacctga 4596960\n+ aaccgtgtac gtacaagcag tgggagcaca ggtttacctg tgtgactgcg taccttttgt 4597020\n+ ataatgggtc agcgacttat attctgtagc aaggttaacc gtatagggga gccggaggga 4597080\n+ aaccgagtct taaccgggcg ttaagttgca gggtatagac ccgaaacccg gtgatctagc 4597140\n+ catgggcagg ttgaaggttg ggtaacacta actggaggac cgaaccgact aatgttgaaa 4597200\n+ aattagcgga tgacctgtgg ctgggggtga aaggccaatc aaaccgggag atagctggtt 4597260\n+ ctccccgaaa gctatttagg tagcgcctcg tgaattcatc tccgggggta gagcactgtt 4597320\n+ tcggctaggg ggccatcccg gcttaccaac ccgatgcaaa ctgcgaatac cggagaatgt 4597380\n+ tatcacggga gacacacggc gggtgctaac gtccgtcgtg aagagggaaa caacccagac 4597440\n+ cgccagctaa ggtcccaaag tcatggttaa gtgggaaacg atgtgggaag gcccagacag 4597500\n+ ccaggatgtt ggcttagaag cagccatcat ttaaagaaag cgtaatagct cactggtcga 4597560\n+ gtcggcctgc gcggaagatg taacggggct aaaccatgca ccgaagctgc ggcagcgaca 4597620\n+ ctcaggtgtt gttgggtagg ggagcgttct gtaagcctgt gaaggtggcc tgtgagggtt 4597680\n+ gctggaggta tcagaagtgc gaatgctgac ataagtaacg ataaagcggg tgaaaagccc 4597740\n+ gctcgccgga agaccaaggg ttcctgtcca acgttaatcg gggcagggtg agtcgacccc 4597800\n+ taaggcgagg ccgaaaggcg tagtcgatgg gaaacgggtt aatattcccg tacttggtgt 4597860\n+ tactgcgaag ggggggacgg agaaggctat gttggccggg cgacggttgt cccggtttaa 4597920\n+ gcgtgtaggt gtgtgttcca ggtaaatccg gttcacttta acactgaggc gtgacgacga 4597980\n+ ggcactacgg tgctgaagca acaaatgccc tgcttccagg aaaagcctct aagcatcagg 4598040\n+ taacatcaaa tcgtacccca aaccgacaca ggtggtcagg tagagaatac caaggcgctt 4598100\n+ gagagaactc gggtgaagga actaggcaaa atggtgccgt aacttcggga gaaggcacgc 4598160\n+ tgacacgtag gtgaagtgat ttactcatgg agctgaagtc agtcgaagat accagctggc 4598220\n+ tgcaactgtt tattaaaaac acagcactgt gcaaacacga aagtggacgt atacggtgtg 4598280\n+ acgcctgccc ggtgccggaa ggttaattga tggggtcagc gcaagcgaag ctcctgatcg 4598340\n+ aagccccggt aaacggcggc cgtaactata acggtcctaa ggtagcgaaa ttccttgtcg 4598400\n+ ggtaagttcc gacctgcacg aatggcgtaa tgatggccag gctgtctcca cccgagactc 4598460\n+ agtgaaattg aactcgctgt gaagatgcag tgtacccgcg gcaagacgga aagaccccgt 4598520\n+ gaacctttac tatagcttga cactgaacat tgagccttga tgtgtaggat aggtgggagg 4598580\n+ ctttgaagtg tggacgccag tctgcatgga gccgaccttg aaataccacc ctttaatgtt 4598640\n+ tgatgttcta acgtggaccc gttacccggg ttgcggacag tgtctggtgg gtagtttgac 4598700\n+ tggggcggtc tcctcctaaa gagtaacgga ggagcacgaa ggttggctaa tcctggtcgg 4598760\n+ acatcaggag gttagtgcaa tggcataagc cagcttgact gcgagcgtga cggcgcgagc 4598820\n+ aggtgcgaaa gcaggtcata gtgatccggt ggttctgaat ggaagggcca tcgctcaacg 4598880\n+ gataaaaggt actccgggga taacaggctg ataccgccca agagttcata tcgacggcgg 4598940\n+ tgtttggcac ctcgatgtcg gctcatccca tcccggggct gaagtaggtc ccaagggtat 4599000\n+ ggctgttcgc catttaaa 4599018\n+//\n' |