Next changeset 1:e34fcd410816 (2022-01-31) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit 800d16f3bd40266d8734f4572988cb2b306b4fd3" |
added:
datasets_genome.xml macros.xml test-data/GCF_000007445.1.genomic.gtf test-data/accessions.txt test-data/genome.2.GCF_000007445.1.genomic.cds test-data/genome.2.GCF_000007445.1.seq.rpt.jsonl test-data/genome.2.GCF_000013305.1.genomic.cds test-data/genome.2.GCF_000013305.1.genomic.gtf test-data/genome.2.GCF_000013305.1.seq.rpt.jsonl test-data/genome.3.GCF_000007445.1.genomic.gbff test-data/genome.3.GCF_000007445.1.genomic.gff test-data/genome.3.GCF_000007445.1.seq.rpt.jsonl test-data/genome.3.GCF_000013305.1.genomic.gbff test-data/genome.3.GCF_000013305.1.genomic.gff test-data/genome.3.GCF_000013305.1.seq.rpt.jsonl |
b |
diff -r 000000000000 -r 1a7773882d2c datasets_genome.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datasets_genome.xml Thu Jan 27 08:20:47 2022 +0000 |
[ |
b'@@ -0,0 +1,215 @@\n+<tool id="datasets_download_genome" name="NCBI Datasets Genomes" profile="@PROFILE@" license="@LICENSE" version="@TOOL_VERSION@">\n+ <description>download genome sequence, annotation and metadata</description>\n+ <macros>\n+ <import>macros.xml</import>\n+ </macros>\n+ <expand macro="requirements"></expand>\n+ <command><![CDATA[\n+@SETUP_CERTIFICATES@\n+datasets download genome $query.subcommand.download_by\n+#if $query.subcommand.download_by == \'accession\':\n+ #if $query.subcommand.text_or_file.text_or_file == \'text\':\n+ #echo " ".join(f"\'{x}\'" for x in $query.subcommand.text_or_file.accession.split(\' \') if x)\n+ #else\n+ --inputfile \'$query.subcommand.text_or_file.inputfile\'\n+ #end if\n+#else:\n+ \'$query.subcommand.taxon\'\n+#end if\n+$filters.reference\n+$filters.annotated\n+#if $filters.assembly_level:\n+--assembly_level $filters.assembly_level\n+#end if\n+#if $filters.assembly_source:\n+--assembly_source $filters.assembly_source\n+#end if\n+#if $filters.chromosomes:\n+--chromosomes \'$filters.chromosomes\'\n+#end if\n+@EXCLUDES_GENOME@\n+@INCLUDES_GENOME@\n+@RELEASED_BEFORE@\n+@RELEASED_SINCE@\n+#for search_term in $filters.search:\n+ --search \'$filters.search_term\'\n+#end for\n+#if $uncompressed\n+&& unzip ncbi_dataset.zip\n+#else\n+&& unzip -l ncbi_dataset.zip > ncbi_dataset.txt\n+#end if\n+]]></command>\n+ <inputs>\n+ <section name="query" title="Query" expanded="true">\n+ <conditional name="subcommand">\n+ <param name="download_by" type="select" label="Choose how to find genomes to download">\n+ <option value="accession">Download by NCBI assembly or BioProject accession</option>\n+ <option value="taxon">Download by taxon</option>\n+ </param>\n+ <when value="accession">\n+ <expand macro="text_or_file"/>\n+ </when>\n+ <when value="taxon">\n+ <param name="taxon" type="text" label="Enter taxon" help="e.g. human, mouse, bos taurus, etc."></param>\n+ </when>\n+ </conditional>\n+ </section>\n+ <section name="filters" title="Filters and Limit">\n+ <param argument="--reference" type="boolean" truevalue="--reference" falsevalue="" label="Limit to reference and representative (GCF_ and GCA_) assemblies"/> \n+ <expand macro="annotation"></expand>\n+ <expand macro="assembly_level"></expand>\n+ <expand macro="assembly_source"></expand>\n+ <expand macro="chromosomes"></expand>\n+ <expand macro="released_options"></expand>\n+ <expand macro="released_options" before_or_after="since"></expand>\n+\n+ <repeat name="search" title="Add search terms">\n+ <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/>\n+ </repeat>\n+ </section>\n+ <section name="file_choices" title="File Choices">\n+ <expand macro="excludes_genome"></expand>\n+ <expand macro="includes_genome"></expand>\n+ </section>\n+ <param name="uncompressed" type="boolean" label="Uncompress the dataset archive" checked="true"/>\n+ </inputs>\n+ <outputs>\n+ <data name="compressed_archive" format="zip" label="Compressed Archive" from_work_dir="ncbi_dataset.zip">\n+ <filter>not uncompressed</filter>\n+ </data>\n+ <data name="archive_contents" format="txt" label="Archive Contents" from_work_dir="ncbi_dataset.txt">\n+ <filter>not uncompressed</filter>\n+ </data>\n+ <data name="genome_data_report" format="json" label="NCBI Genome Datasets: Data Report" from_work_dir="ncbi_dataset/data/assembly_data_report.jsonl">\n+ <filter>uncompressed</filter>\n+ </data>\n+ <collection name="sequence_re'..b'before" value="01/01/2007"></param>\n+ <param name="exclude_genomic_cds" value="true"/>\n+ <param name="include_gtf" value="true"/>\n+ <output name="genome_data_report">\n+ <assert_contents>\n+ <has_text text="GCF_000013305.1"/>\n+ </assert_contents>\n+ </output>\n+ <output_collection name="sequence_report" type="list">\n+ <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.seq.rpt.jsonl" compare="contains"/>\n+ <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.seq.rpt.jsonl" compare="contains"/>\n+ </output_collection>\n+ <output_collection name="genomic_gtf" type="list">\n+ <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.gtf" compare="contains"/>\n+ <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gtf" compare="contains"/>\n+ </output_collection>\n+ <output_collection name="genomic_cds" type="list">\n+ <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.cds" compare="contains"/>\n+ <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.genomic.cds" compare="contains"/>\n+ </output_collection>\n+ </test>\n+ <test expect_num_outputs="4">\n+ <conditional name="query|subcommand">\n+ <param name="download_by" value="accession"></param>\n+ <conditional name="text_or_file">\n+ <param name="text_or_file" value="file"></param>\n+ <param name="inputfile" value="accessions.txt"></param>\n+ </conditional>\n+ </conditional>\n+ <param name="include_gbff" value="true"/>\n+ <param name="exclude_seq" value="false"/>\n+ <param name="exclude_gff3" value="true"/>\n+ <param name="uncompressed" value="true"/>\n+ <param name="released_before" value="01/02/2007"></param>\n+ <output name="genome_data_report">\n+ <assert_contents>\n+ <has_text text="SAMN02604181"/>\n+ </assert_contents>\n+ </output>\n+ <output_collection name="sequence_report" type="list">\n+ <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.seq.rpt.jsonl" compare="contains"/>\n+ <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.seq.rpt.jsonl" compare="contains"/>\n+ </output_collection>\n+ <output_collection name="genomic_gff" type="list">\n+ <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gff" compare="contains"/>\n+ <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gff" compare="contains"/>\n+ </output_collection>\n+ <output_collection name="genomic_gbff" type="list">\n+ <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gbff" compare="contains"/>\n+ <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gbff" compare="contains"/>\n+ </output_collection>\n+ </test>\n+ </tests>\n+ <help>\n+<![CDATA[\n+**Download Genome Datasets from NCBI**\n+\n+Download a genome dataset including genome, transcript and protein sequence, annotation and a detailed data report.\n+Genome datasets can be specified by NCBI Assembly or BioProject accession or taxon. Datasets are downloaded as a zip file.\n+\n+Tthe default genome dataset includes the following files (if available):\n+ * genomic.fna (genomic sequences)\n+ * rna.fna (transcript sequences)\n+ * protein.faa (protein sequences)\n+ * genomic.gff (genome annotation in gff3 format)\n+ * data_report.jsonl (data report with genome assembly and annotation metadata)\n+ * dataset_catalog.json (a list of files and file types included in the dataset)\n+]]>\n+ </help>\n+\n+</tool>\n' |
b |
diff -r 000000000000 -r 1a7773882d2c macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Jan 27 08:20:47 2022 +0000 |
[ |
@@ -0,0 +1,121 @@ +<macros> + <token name="@TOOL_VERSION@">12.27.1</token> + <token name="@PROFILE@">20.01</token> + <token name="@LICENSE@">MIT</token> + <token name="@PROFILE_AND_LICENSE@">profile="@PROFILE@" license="@LICENSE@"</token> + <token name="@SETUP_CERTIFICATES@"><![CDATA[ +## If running in container use certificate from ca-certificates instead of outdated / missing container certificates +[ -f /usr/local/ssl/cacert.pem ] && export SSL_CERT_FILE="/usr/local/ssl/cacert.pem"; + ]]></token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">ncbi-datasets-cli</requirement> + <requirement type="package" version="2021.5.30">ca-certificates</requirement> + <requirement type="package" version="16.02">p7zip</requirement> + </requirements> + </xml> + <xml name="annotation"> + <param argument="--annotated" type="boolean" truevalue="--annotated" falsevalue="" label="Only include genomes with annotation ?"/> + </xml> + <xml name="dehydrated"> + <param argument="--dehydrated" type="boolean" truevalue="--dehydrated" falsevalue="" label="Download a dehydrated zip archive including the data report and locations of data files ?" help="Use the rehydrate tools to retrieve data files"/> + </xml> + <xml name="assembly_level"> + <param argument="--assembly-level" type="select" label="Restrict assemblies to a comma-separated list of one or more of these" multiple="true" optional="true"> + <option value="chromosome">Chromosome</option> + <option value="complete_genome">Complete Genome</option> + <option value="contig">Contig</option> + <option value="scaffold">Scaffold</option> + </param> + </xml> + <xml name="assembly_source"> + <param argument="--assembly-source" type="select" optional="true"> + <option value="refseq">RefSeq</option> + <option value="genabnk">GenBank</option> + </param> + </xml> + <xml name="text_or_file" token_what="accession" token_what_extended="NCBI Assembly accession" token_help="Can be NCBI Assembly or BioProject accession"> + <conditional name="text_or_file" label="How do you want to specify the @WHAT@(s) to download"> + <param name="text_or_file" type="select" label="Enter @WHAT@ or read from file ?"> + <option value="text">Enter @WHAT@s</option> + <option value="file">Read a list of @WHAT_EXTENDED@s from a dataset</option> + </param> + <when value="text"> + <param name="accession" type="text" label="Enter space separated list of @WHAT@s" help="@HELP@"> + <yield/> + </param> + </when> + <when value="file"> + <param argument="--inputfile" type="data" format="txt" label="Select dataset with list of @WHAT_EXTENDED@s" help="@HELP@"/> + </when> + </conditional> + </xml> + <xml name="chromosomes"> + <param argument="--chromosomes" type="text" label="Limit chromosomes to a comma-delimited list of chromosomes"> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="_" /> + <add value="." /> + <add value="," /> + </valid> + </sanitizer> + </param> + </xml> + <xml name="include" token_include_what="gbff" token_include_label="Include GenBank flat file sequence and annotation, if available"> + <param argument="--include-@INCLUDE_WHAT@" type="boolean" truevalue="--include-@INCLUDE_WHAT@" falsevalue="" label="@INCLUDE_LABEL@" /> + </xml> + <xml name="includes_genome"> + <expand macro="include" include_what="gbff" include_label="Include GenBank flat file sequence and annotation, if available"/> + <expand macro="include" include_what="gtf" include_label="Include gtf annotation file, if available"/> + </xml> + <xml name="exclude" token_exclude_what="gff3" token_exclude_label="Exclude gff3 annotation file"> + <param argument="--exclude-@EXCLUDE_WHAT@" type="boolean" truevalue="--exclude-@EXCLUDE_WHAT@" falsevalue="" label="@EXCLUDE_LABEL@" /> + </xml> + <xml name="anti-exclude" token_exclude_what="gff3" token_exclude_label="Include gff3 annotation file" token_checked="false"> + <param argument="--exclude-@EXCLUDE_WHAT@" type="boolean" falsevalue="--exclude-@EXCLUDE_WHAT@" truevalue="" label="@EXCLUDE_LABEL@" checked="@CHECKED@"/> + </xml> + <xml name="excludes_genome"> + <expand macro="anti-exclude" exclude_what="seq" exclude_label="Include genomic sequence file" checked="true"/> + <expand macro="anti-exclude" exclude_what="gff3" exclude_label="Include gff3 annotation file"/> + <expand macro="anti-exclude" exclude_what="genomic-cds" exclude_label="Include cds from genomic sequence file"/> + <expand macro="anti-exclude" exclude_what="protein" exclude_label="Include protein sequence file"/> + <expand macro="anti-exclude" exclude_what="rna" exclude_label="Include transcript sequence file"/> + </xml> + <xml name="excludes_gene"> + <expand macro="exclude" exclude_what="gene" exclude_label="Exclude gene sequence file"/> + <expand macro="exclude" exclude_what="protein" exclude_label="Exclude protein sequence file"/> + <expand macro="exclude" exclude_what="rna" exclude_label="Exclude transcript sequence file"/> + </xml> + <xml name="excludes_virus_protein"> + <yield/> + <expand macro="exclude" exclude_what="protein" exclude_label="Exclude protein sequence file"/> + <expand macro="exclude" exclude_what="pdb" exclude_label="Exclude protein structure files (pdb)"/> + <expand macro="exclude" exclude_what="gpff" exclude_label="Exclude protein sequence and annotation in GenPept flat file"/> + <expand macro="exclude" exclude_what="cds" exclude_label="Exclude CDS sequence file"/> + </xml> + <xml name="excludes_virus_genome"> + <expand macro="excludes_virus_protein"> + <expand macro="exclude" exclude_what="seq" exclude_label="Exclude genomic sequence file"/> + </expand> + </xml> + <token name="@EXCLUDES_GENOME@">$file_choices.exclude_gff3 $file_choices.exclude_genomic_cds $file_choices.exclude_protein $file_choices.exclude_rna $file_choices.exclude_seq</token> + <token name="@EXCLUDES_GENE@">$exclude_gene $exclude_protein $exclude_rna</token> + <token name="@EXCLUDES_VIRUS_PROTEIN@">$exclude_protein $exclude_pdb $exclude_gpff $exclude_cds</token> + <token name="@EXCLUDES_VIRUS_GENOME@">$exclude_seq @EXCLUDES_VIRUS_PROTEIN@</token> + <xml name="includes_virus_genome"> + <expand macro="include" include_what="gbff" include_label="Include GenBank flat file sequence and annotation"/> + </xml> + <token name="@INCLUDES_GENOME@">$file_choices.include_gbff $file_choices.include_gtf</token> + <token name="@INCLUDES_VIRUS_GENOME@">$include_gbff</token> + <xml name="released_options" token_released_what="genomes" token_before_or_after="before"> + <param argument="--released-@BEFORE_OR_AFTER@" type="text" optional="true" label="Only include @RELEASED_WHAT@ that have been released @BEFORE_OR_AFTER@ a specified date (MM/DD/YYYY)"></param> + </xml> + <token name="@RELEASED_BEFORE@">#if $filters.released_before: +--released-before '$filters.released_before' +#end if + </token> + <token name="@RELEASED_SINCE@">#if $filters.released_since: +--released-since '$filters.released_since' +#end if + </token> +</macros> |
b |
diff -r 000000000000 -r 1a7773882d2c test-data/GCF_000007445.1.genomic.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/GCF_000007445.1.genomic.gtf Thu Jan 27 08:20:47 2022 +0000 |
b |
@@ -0,0 +1,10 @@ +#gtf-version 2.2 +#!genome-build ASM744v1 +#!genome-build-accession NCBI_Assembly:GCF_000007445.1 +#!annotation-date 05/06/2021 17:43:00 +#!annotation-source NCBI RefSeq +NC_004431.1 RefSeq gene 190 255 . + . gene_id "C_RS00005"; transcript_id ""; gbkey "Gene"; gene "thrL"; gene_biotype "protein_coding"; locus_tag "C_RS00005"; old_locus_tag "c5491"; +NC_004431.1 Protein Homology CDS 190 252 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; +NC_004431.1 Protein Homology start_codon 190 192 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; +NC_004431.1 Protein Homology stop_codon 253 255 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; +NC_004431.1 RefSeq gene 453 911 . + . gene_id "C_RS00010"; transcript_id ""; gbkey "Gene"; gene "tnpA"; gene_biotype "protein_coding"; locus_tag "C_RS00010"; old_locus_tag "c0002"; |
b |
diff -r 000000000000 -r 1a7773882d2c test-data/accessions.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/accessions.txt Thu Jan 27 08:20:47 2022 +0000 |
b |
@@ -0,0 +1,2 @@ +GCF_000013305.1 +GCF_000007445.1 |
b |
diff -r 000000000000 -r 1a7773882d2c test-data/genome.2.GCF_000007445.1.genomic.cds --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.2.GCF_000007445.1.genomic.cds Thu Jan 27 08:20:47 2022 +0000 |
[ |
@@ -0,0 +1,10 @@ +>lcl|NC_004431.1_cds_WP_001386572.1_1 [gene=thrL] [locus_tag=C_RS00005] [protein=thr operon leader peptide] [protein_id=WP_001386572.1] [location=190..255] [gbkey=CDS] +ATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGA +>lcl|NC_004431.1_cds_WP_000526115.1_2 [gene=tnpA] [locus_tag=C_RS00010] [protein=IS200/IS605-like element IS200C family transposase] [protein_id=WP_000526115.1] [location=453..911] [gbkey=CDS] +ATGGGGAACGAAAAGAGCTTAGCGCACACCCGATGGAACTGTAAATATCACATAGTATTTGCGCCAAAATACCGAAGACA +GGTGTTCTACAGAGAGAAGCGTAGAGCAATAGGCTGTATTTTGAGAAAGCTGTGTGAGTGGAAAAGTGTACGGATTCTGG +AAGCTGAATGCTGTGCAGATCATATCCATATGCTTGTGGAGATCCCGCCCAAAATGAGCGTATCAGGCTTTATGGGATAT +CTGAAAGGGAAAAGCAGTCTGATGCCTTACGAGCAGTTTGGTGATTTGAAATTCAAATACAGGAACAGGGAGTTCTGGTG +CAGAGGGTATTACGTCGATACGGTGGGTAAGAACACGGCGAAGATACAGGATTACATAAAGCACCAGCTTGAAGAGGATA +AAATGGGAGAGCAGTTATCGATTCCCTATCCGGGCAGCCCGTTTACGGGCCGTAAGTAA +>lcl|NC_004431.1_cds_WP_001264710.1_3 [gene=thrA] [locus_tag=C_RS00015] [protein=bifunctional aspartate kinase/homoserine dehydrogenase I] [protein_id=WP_001264710.1] [location=1048..3510] [gbkey=CDS] |
b |
diff -r 000000000000 -r 1a7773882d2c test-data/genome.2.GCF_000007445.1.seq.rpt.jsonl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.2.GCF_000007445.1.seq.rpt.jsonl Thu Jan 27 08:20:47 2022 +0000 |
b |
@@ -0,0 +1,1 @@ +{"assemblyUnit":"GCF_000007455.1","assignedMoleculeLocationType":"Chromosome","chrName":"ANONYMOUS","gcCount":"2640553","genbankAccession":"AE014075.1","length":5231428,"refseqAccession":"NC_004431.1","sortOrder":1} |
b |
diff -r 000000000000 -r 1a7773882d2c test-data/genome.2.GCF_000013305.1.genomic.cds --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.2.GCF_000013305.1.genomic.cds Thu Jan 27 08:20:47 2022 +0000 |
[ |
@@ -0,0 +1,10 @@ +>lcl|NC_008253.1_cds_WP_001386572.1_1 [gene=thrL] [locus_tag=ECP_RS00005] [protein=thr operon leader peptide] [protein_id=WP_001386572.1] [location=190..255] [gbkey=CDS] +ATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGA +>lcl|NC_008253.1_cds_WP_001264707.1_2 [gene=thrA] [locus_tag=ECP_RS00010] [protein=bifunctional aspartate kinase/homoserine dehydrogenase I] [protein_id=WP_001264707.1] [location=336..2798] [gbkey=CDS] +ATGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGGGTTGCCGATATTCTGGAAAGCAA +TGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCATCTGGTAGCGATGATTGAAAAAA +CCATTAGCGGTCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTCTGACGGGACTCGCCGCC +GCCCAGCCGGGATTTCCGCTGGCACAATTGAAAACTTTCGTCGACCAGGAATTTGCCCAAATAAAACATGTCCTGCATGG +CATCAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAAATGTCGATCGCCATTA +TGGCCGGCGTGTTAGAAGCGCGTGGTCACAACGTTACCGTTATCGATCCGGTCGAAAAACTGCTGGCAGTGGGTCATTAC +CTCGAATCTACCGTTGATATTGCTGAATCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGACCACATGGTGCTGAT |
b |
diff -r 000000000000 -r 1a7773882d2c test-data/genome.2.GCF_000013305.1.genomic.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.2.GCF_000013305.1.genomic.gtf Thu Jan 27 08:20:47 2022 +0000 |
b |
@@ -0,0 +1,10 @@ +#gtf-version 2.2 +#!genome-build ASM1330v1 +#!genome-build-accession NCBI_Assembly:GCF_000013305.1 +#!annotation-date 05/06/2021 17:31:48 +#!annotation-source NCBI RefSeq +NC_008253.1 RefSeq gene 190 255 . + . gene_id "ECP_RS00005"; transcript_id ""; gbkey "Gene"; gene "thrL"; gene_biotype "protein_coding"; locus_tag "ECP_RS00005"; old_locus_tag "ECP_0001"; +NC_008253.1 Protein Homology CDS 190 252 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; +NC_008253.1 Protein Homology start_codon 190 192 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; +NC_008253.1 Protein Homology stop_codon 253 255 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; +NC_008253.1 RefSeq gene 336 2798 . + . gene_id "ECP_RS00010"; transcript_id ""; gbkey "Gene"; gene "thrA"; gene_biotype "protein_coding"; locus_tag "ECP_RS00010"; old_locus_tag "ECP_0002"; |
b |
diff -r 000000000000 -r 1a7773882d2c test-data/genome.2.GCF_000013305.1.seq.rpt.jsonl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.2.GCF_000013305.1.seq.rpt.jsonl Thu Jan 27 08:20:47 2022 +0000 |
b |
@@ -0,0 +1,1 @@ +{"assemblyUnit":"GCF_000013315.1","assignedMoleculeLocationType":"Chromosome","chrName":"ANONYMOUS","gcCount":"2495020","genbankAccession":"CP000247.1","length":4938920,"refseqAccession":"NC_008253.1","sortOrder":1} |
b |
diff -r 000000000000 -r 1a7773882d2c test-data/genome.3.GCF_000007445.1.genomic.gbff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.3.GCF_000007445.1.genomic.gbff Thu Jan 27 08:20:47 2022 +0000 |
b |
@@ -0,0 +1,10 @@ +LOCUS NC_004431 5231428 bp DNA circular CON 13-MAY-2021 +DEFINITION Escherichia coli CFT073, complete sequence. +ACCESSION NC_004431 NZ_AE016755 NZ_AE016756 NZ_AE016757 NZ_AE016758 + NZ_AE016759 NZ_AE016760 NZ_AE016761 NZ_AE016762 NZ_AE016763 + NZ_AE016764 NZ_AE016765 NZ_AE016766 NZ_AE016767 NZ_AE016768 + NZ_AE016769 NZ_AE016770 NZ_AE016771 NZ_AE016772 +VERSION NC_004431.1 +DBLINK BioProject: PRJNA224116 + BioSample: SAMN02604094 + Assembly: GCF_000007445.1 |
b |
diff -r 000000000000 -r 1a7773882d2c test-data/genome.3.GCF_000007445.1.genomic.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.3.GCF_000007445.1.genomic.gff Thu Jan 27 08:20:47 2022 +0000 |
b |
@@ -0,0 +1,10 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +#!genome-build ASM744v1 +#!genome-build-accession NCBI_Assembly:GCF_000007445.1 +#!annotation-date 05/06/2021 17:43:00 +#!annotation-source NCBI RefSeq +##sequence-region NC_004431.1 1 5231428 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=199310 +NC_004431.1 RefSeq region 1 5231428 . + . ID=NC_004431.1:1..5231428;Dbxref=taxon:199310;Is_circular=true;Name=ANONYMOUS;gbkey=Src;genome=chromosome;mol_type=genomic DNA;strain=CFT073 |
b |
diff -r 000000000000 -r 1a7773882d2c test-data/genome.3.GCF_000007445.1.seq.rpt.jsonl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.3.GCF_000007445.1.seq.rpt.jsonl Thu Jan 27 08:20:47 2022 +0000 |
b |
@@ -0,0 +1,1 @@ +{"assemblyUnit":"GCF_000007455.1","assignedMoleculeLocationType":"Chromosome","chrName":"ANONYMOUS","gcCount":"2640553","genbankAccession":"AE014075.1","length":5231428,"refseqAccession":"NC_004431.1","sortOrder":1} |
b |
diff -r 000000000000 -r 1a7773882d2c test-data/genome.3.GCF_000013305.1.genomic.gbff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.3.GCF_000013305.1.genomic.gbff Thu Jan 27 08:20:47 2022 +0000 |
b |
@@ -0,0 +1,10 @@ +LOCUS NC_008253 4938920 bp DNA circular CON 13-MAY-2021 +DEFINITION Escherichia coli 536, complete sequence. +ACCESSION NC_008253 +VERSION NC_008253.1 +DBLINK BioProject: PRJNA224116 + BioSample: SAMN02604181 + Assembly: GCF_000013305.1 +KEYWORDS RefSeq. +SOURCE Escherichia coli 536 + ORGANISM Escherichia coli 536 |
b |
diff -r 000000000000 -r 1a7773882d2c test-data/genome.3.GCF_000013305.1.genomic.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.3.GCF_000013305.1.genomic.gff Thu Jan 27 08:20:47 2022 +0000 |
b |
@@ -0,0 +1,10 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +#!genome-build ASM1330v1 +#!genome-build-accession NCBI_Assembly:GCF_000013305.1 +#!annotation-date 05/06/2021 17:31:48 +#!annotation-source NCBI RefSeq +##sequence-region NC_008253.1 1 4938920 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=362663 +NC_008253.1 RefSeq region 1 4938920 . + . ID=NC_008253.1:1..4938920;Dbxref=taxon:362663;Is_circular=true;Name=ANONYMOUS;gbkey=Src;genome=chromosome;mol_type=genomic DNA;serogroup=O6:K15:H31;strain=536 |
b |
diff -r 000000000000 -r 1a7773882d2c test-data/genome.3.GCF_000013305.1.seq.rpt.jsonl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.3.GCF_000013305.1.seq.rpt.jsonl Thu Jan 27 08:20:47 2022 +0000 |
b |
@@ -0,0 +1,1 @@ +{"assemblyUnit":"GCF_000013315.1","assignedMoleculeLocationType":"Chromosome","chrName":"ANONYMOUS","gcCount":"2495020","genbankAccession":"CP000247.1","length":4938920,"refseqAccession":"NC_008253.1","sortOrder":1} |