Mercurial > repos > bgruening > agat
changeset 2:e009d8260be2 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/agat commit 8eb20f601bc1d2a50c8877b7d0ade057e8f86eae
author | bgruening |
---|---|
date | Thu, 07 Sep 2023 05:29:24 +0000 |
parents | 6d8444408ff1 |
children | a6318f87f2cd |
files | agat.xml macros.xml test-data/region.bed test-data/test01_plot1.pdf test-data/test01_plot2.pdf test-data/test01_stats.txt test-data/test03.txt test-data/test07.gff test-data/test07.tabular test-data/test09.txt test-data/test13.gff |
diffstat | 11 files changed, 402 insertions(+), 210 deletions(-) [+] |
line wrap: on
line diff
--- a/agat.xml Tue May 23 18:05:26 2023 +0000 +++ b/agat.xml Thu Sep 07 05:29:24 2023 +0000 @@ -9,19 +9,30 @@ <command detect_errors="exit_code"><![CDATA[ #if $tool.selector == 'fix' @input_annotation_single@ - agat_convert_sp_gxf2gxf.pl -gff $input_annotation --output 'output.gff' && - cat 'output.gff' > '${annotation_gff}' + agat_convert_sp_gxf2gxf.pl + --gxf $input_annotation + --config $agat_configfile + --output 'output' && + cat 'output' > '${annotation}' #else if $tool.selector == 'convert_GFF2GTF' @input_annotation_single@ - agat_convert_sp_gff2gtf.pl --gff $input_annotation --gtf_version $tool.gtf_version --output 'output.gtf' && + agat_convert_sp_gff2gtf.pl + --gff $input_annotation + --gtf_version $tool.gtf_version + --output 'output.gtf' && cat 'output.gtf' > '${annotation_gtf}' #else if $tool.selector == 'convert_GTF2GFF' @input_annotation_single@ - agat_convert_sp_gxf2gxf.pl --gff $input_annotation --output 'output.gff' && + agat_convert_sp_gxf2gxf.pl + --gff $input_annotation + --output 'output.gff' && cat 'output.gff' > '${annotation_gff}' #else if $tool.selector == 'compare' @input_annotation_double@ - agat_sp_compare_two_annotations.pl --gff1 $input1 --gff2 $input2 --output 'temp_output' && + agat_sp_compare_two_annotations.pl + --gff1 $input1 + --gff2 $input2 + --output 'temp_output' && cat 'temp_output' > '${stats_output}' #else if $tool.selector == 'extract' @input_annotation_single@ @@ -56,35 +67,100 @@ @input_annotation_single@ @input_reference@ mkdir -p './statistics' && - agat_sp_statistics.pl + agat_sp_functional_statistics.pl --gff $input_annotation --gs $ref_genome --output 'temp_output' && - cat 'temp_output' > '$stats_output' + cat 'temp_output/gene@transcript/table_per_feature_type.txt' > '$stats_output' + #else if $tool.selector == 'merge_annotations' @input_annotation_double@ - agat_sp_merge_annotations.pl -gff $input1 --gff $input2 --output 'temp_output' && - cat 'temp_output' > '${annotation_gff}' + agat_sp_merge_annotations.pl + --gff $input1 + --gff $input2 + --config $agat_configfile + --output 'output' && + cat 'output' > '${annotation}' #else if $tool.selector == 'annotation_statistics' @input_annotation_single@ @input_reference@ - agat_sp_statistics.pl --gff $input_annotation --gs $ref_genome -d --output 'temp_output' && + agat_sp_statistics.pl + --gff $input_annotation + --gs $ref_genome + -d + --output 'temp_output' && cat 'temp_output' > '$stats_output' #else if $tool.selector == 'filter_feature_fasta' @input_annotation_single@ @input_reference@ - agat_sq_filter_feature_from_fasta.pl --gff $input_annotation --fasta $ref_genome --output 'temp_output' && - cat 'temp_output' > '${features_filtered}' + agat_sq_filter_feature_from_fasta.pl + --gff $input_annotation + --fasta $ref_genome + --config $agat_configfile + --output 'output' && + cat 'output' > '${annotation}' #else if $tool.selector == 'complement' @input_annotation_double@ - agat_sp_complement_annotations.pl --ref $input1 --add $input2 --size_min $tool.size_min --output 'temp_output' && - cat 'temp_output' > '${annotation_gff}' + agat_sp_complement_annotations.pl + --ref $input1 + --add $input2 + --size_min $tool.size_min + --config $agat_configfile + --output 'temp_output' && + cat 'temp_output' > '${annotation}' + #else if $tool.selector == 'splice_sites' + @input_annotation_single@ + agat_sp_add_splice_sites.pl + --gff $input_annotation + --config $agat_configfile + --output 'output' && + cat 'output' > '${annotation}' #end if ]]> </command> + <configfiles> + <configfile name="agat_configfile"><![CDATA[ +#if $tool.selector in ['fix','merge_annotations','complement','splice_sites','filter_feature_fasta'] +--- +output_format: $tool.output_format.selector +#if $tool.output_format.selector == "GFF" +gff_output_version: $tool.output_format.version +gtf_output_version: relax +#else +gff_output_version: 3 +gtf_output_version: $tool.output_format.version +#end if +verbose: 1 +progress_bar: true +log: true +debug: false +tabix: false +merge_loci: $tool.merge_loci +throw_fasta: false +force_gff_input_version: 0 +create_l3_for_l2_orphan: $tool.create_exon +locus_tag: +- locus_tag +- gene_id +prefix_new_id: nbis +check_sequential: true +check_l2_linked_to_l3: true +check_l1_linked_to_l2: true +remove_orphan_l1: true +check_all_level3_locations: true +check_cds: true +check_exons: true +check_utrs: true +check_all_level2_locations: true +check_all_level1_locations: true +check_identical_isoforms: true +#end if + ]]></configfile> + </configfiles> <inputs> <conditional name="tool"> <param name="selector" type="select" label="AGAT tool selector" help="As AGAT is a toolkit, it contains a lot of tools. If any of them is missing, please contact the server admin."> + <option value="splice_sites">Add splice sites</option> <option value="annotation_statistics">Annotation statistics (agat_sp_statistics.pl)</option> <option value="compare">Compare annotation files (agat_sp_compare_two_annotations.pl)</option> <option value="complement">Complement annotation file (agat_sp_complement_annotations.pl)</option> @@ -113,8 +189,8 @@ <option value="exon">Exon</option> <option value="cds">CDS</option> <option value="trna">tRNA</option> - <option value="three_prime_utr">3' UTR</option> - <option value="five_prime_utr">5' UTR</option> + <option value="three_prime_utr">3 UTR</option> + <option value="five_prime_utr">5 UTR</option> </param> <param argument="--mrna" type="boolean" truevalue="--mrna" falsevalue="" checked="false" label="Extract mRNA sequences" help=" This extract the mrna sequence (i.e transcribed sequence (devoid of introns, but containing untranslated exons))." /> @@ -127,7 +203,7 @@ <param argument="--clean_internal_stop" type="boolean" truevalue="--clean_internal_stop" falsevalue="" checked="false" label="Clean internal stop codons" help="The Clean Internal Stop option allows replacing the translation of the stop codons present among the sequence that is represented by the '*' character by . This character can be disturbing for many programs (e.g interproscan)" /> - <param argument="--upstream" type="integer" min="0" value="" optional="true" label="Upstream nucleotides" help="It will take that number of nucleotide in more at the 5' extremity." /> + <param argument="--upstream" type="integer" min="0" value="" optional="true" label="Upstream nucleotides" help="It will take that number of nucleotide in more at the 5 extremity." /> <param argument="--downstream" type="integer" min="0" value="" optional="true" label="Downstream nucleotides" help="It will take that number of downstream nucleotides." /> <param argument="--full" type="boolean" truevalue="--full" falsevalue="" checked="false" label="Full" help="This option allows dealing with feature that may span over several locations like CDS or exon, in order to extract the full sequence from the start extremity @@ -171,9 +247,11 @@ <when value="filter_feature_fasta"> <expand macro="ANNOTATION_INPUT" /> <expand macro="REFERENCE_FASTA"/> + <expand macro="AGAT_CONFIG"/> </when> <when value="fix"> <expand macro="ANNOTATION_INPUT" format="gff,gff3,gff3.gz"/> + <expand macro="AGAT_CONFIG"/> </when> <when value="functional_analysis"> <expand macro="ANNOTATION_INPUT" format="gff,gtf,gff3,gff3.gz"/> @@ -182,24 +260,33 @@ <when value="merge_annotations"> <param argument="--gff1" name="input_annotation1" type="data" format="gff,gtf,gff3,gff3.gz" label="Annotation file 1" help="Input GTF/GFF file" /> <param argument="--gff2" name="input_annotation2" type="data" format="gff,gtf,gff3,gff3.gz" label="Annotation file 2" help="Input GTF/GFF file" /> + <expand macro="AGAT_CONFIG"/> </when> <when value="complement"> <param argument="--ref" name="input_annotation1" type="data" format="gff,gtf,gff3,gff3.gz" label="Reference annotaiton" help="Reference GTF/GFF file" /> <param argument="--add" name="input_annotation2" type="data" format="gff,gtf,gff3,gff3.gz" label="Annotation to complement" help="Annotation file you would like to use to complement the reference annotation." /> <param argument="--size_min" type="integer" min="0" value="0" label="Minimun CDS size" help="Option to keep the non-overlping gene only if the CDS size (in nucleotide) is over the minimum size defined. Default = 0 that means all of them are kept." /> + <expand macro="AGAT_CONFIG"/> + </when> + <when value="splice_sites"> + <expand macro="ANNOTATION_INPUT" format="gff,gff3,gff3.gz"/> + <expand macro="AGAT_CONFIG"/> </when> </conditional> </inputs> <outputs> <data name="annotation_gff" format="gff" label="${tool.name} on ${on_string}: annotation file (GFF)"> - <filter>tool['selector'] not in ['annotation_statistics','extract','functional_analysis','compare','convert_GFF2GTF','filter_feature_fasta']</filter> + <filter>tool['selector'] == 'convert_GTF2GFF'</filter> </data> <data name="annotation_gtf" format="gtf" label="${tool.name} on ${on_string}: annotation file (GTF)"> <filter>tool['selector'] == 'convert_GFF2GTF'</filter> </data> - <data name="features_filtered" format="tabular" label="${tool.name} on ${on_string}: filtered results"> - <filter>tool['selector'] == 'filter_feature_fasta'</filter> + <data name="annotation" format="gff" label="${tool.name} on ${on_string}: annotation file"> + <filter>tool['selector'] in ['fix','merge_annotations','complement','filter_feature_fasta','splice_sites','bam2gff']</filter> + <change_format> + <when input="output_format.selector" value="GTF" format="gtf" /> + </change_format> </data> <data name="sequence_output" format="fasta" label="${tool.name} on ${on_string}: FASTA file"> <filter>tool['selector'] =='extract'</filter> @@ -228,9 +315,6 @@ </conditional> </conditional> <output name="stats_output" file="test01_stats.txt" ftype="txt"/> - <output_collection name="distribution_plots_woiso" type="list" count="4"> - <element name="transcriptClass_cds" file="test01_plot2.pdf" ftype="pdf" compare="sim_size" delta="100"/> - </output_collection> <output_collection name="distribution_plots_wiso" type="list" count="4"> <element name="transcriptClass_cds" file="test01_plot1.pdf" ftype="pdf" compare="sim_size" delta="100"/> </output_collection> @@ -259,13 +343,17 @@ </conditional> <output name="stats_output" file="test03.txt" ftype="txt" lines_diff="2"/> </test> - <!-- Test 04: comlement annotation --> + <!-- Test 04: complement annotation --> <test expect_num_outputs="1"> <conditional name="tool"> <param name="selector" value="complement"/> <param name="input_annotation1" value="annotation_small.gtf" ftype="gtf"/> <param name="input_annotation2" value="annotation_unique.gtf" ftype="gtf"/> <param name="size_min" value="10"/> + <conditional name="output_format"> + <param name="selector" value="gff"/> + <param name="version" value="3"/> + </conditional> </conditional> <output name="annotation_gff" file="test04.gff" ftype="gff"/> </test> @@ -296,7 +384,7 @@ <param name="history_item" value="genome.fasta.gz"/> </conditional> </conditional> - <output name="features_filtered" file="test07.tabular" ftype="tabular"/> + <output name="annotation" file="test07.gff" ftype="gff"/> </test> <!-- Test 08: Fix annotation file --> <test expect_num_outputs="1"> @@ -328,6 +416,10 @@ <param name="input_annotation1" value="annotation_small.gtf"/> <param name="input_annotation2" value="annotation_unique.gtf"/> </conditional> + <conditional name="output_format"> + <param name="selector" value="gff"/> + <param name="version" value="3"/> + </conditional> <output name="annotation_gff" file="test10.gff" ftype="gff"/> </test> <!-- Test 11: Test compressed files --> @@ -356,6 +448,14 @@ <has_text text="Job done" /> </assert_stdout> </test> + <!-- Test 13: Add splicing sites --> + <test expect_num_outputs="1"> + <conditional name="tool"> + <param name="selector" value="splice_sites"/> + <param name="gff" value="test04.gff" ftype="gff"/> + </conditional> + <output name="annotation" file="test13.gff" ftype="gff"/> + </test> </tests> <help><![CDATA[
--- a/macros.xml Tue May 23 18:05:26 2023 +0000 +++ b/macros.xml Thu Sep 07 05:29:24 2023 +0000 @@ -1,6 +1,6 @@ <macros> - <token name="@TOOL_VERSION@">1.1.0</token> - <token name="@VERSION_SUFFIX@">1</token> + <token name="@TOOL_VERSION@">1.2.0</token> + <token name="@VERSION_SUFFIX@">0</token> <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">agat</requirement> @@ -19,7 +19,36 @@ <xml name="ANNOTATION_INPUT" token_format="gff,gtf,gff3,gff3.gz"> <param argument="--gff" type="data" format="@FORMAT@" label="Annotation file" help="Input GTF/GFF file" /> </xml> - + <xml name="AGAT_CONFIG"> + <conditional name="output_format"> + <param name="selector" type="select" label="Output format"> + <option value="GFF">GFF</option> + <option value="GTF">GTF</option> + </param> + <when value="GFF"> + <param name="version" type="select" label="Format version"> + <option value="1">1</option> + <option value="2">2</option> + <option value="2.5">2.5</option> + <option value="3" selected="true">3</option> + </param> + </when> + <when value="GTF"> + <param name="version" type="select" label="Format version"> + <option value="1">1 = ("CDS", "start_codon", "stop_codon", "exon", "intron")</option> + <option value="2">2 = ("CDS", "start_codon", "stop_codon", "exon")</option> + <option value="2.1">2.1 = ("CDS", "start_codon", "stop_codon", "exon", "5UTR", "3UTR")</option> + <option value="2.2">2.2 = ("CDS", "start_codon", "stop_codon", "5UTR", "3UTR", "inter", "inter_CNS", "intron_CNS", "exon")</option> + <option value="2.5">2.5 = ("gene", "transcript", "exon", "CDS", "UTR", "start_codon", "stop_codon", "Selenocysteine")</option> + <option value="3">3 = ("gene", "transcript", "exon", "CDS", "Selenocysteine", "start_codon", "stop_codon", "three_prime_utr", "five_prime_utr")</option> + <option value="relax" selected="true">Relax = All feature types will be accepted</option> + </param> + </when> + </conditional> + <param name="merge_loci" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Merge loci" help="Should overlapping loci (at CDS level) be merged in a single locus. Only one gene is kept, and the mRNA features become isoforms." /> + <param name="create_exon" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Create exon when l2 do not have children"/> + </xml> + <xml name="REFERENCE_FASTA"> <conditional name="reference_genome"> <param name="source" type="select" label="Source for the reference genome" help="Built-in references were created using default options.">
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/region.bed Thu Sep 07 05:29:24 2023 +0000 @@ -0,0 +1,1 @@ +K03455 1 2669
--- a/test-data/test01_stats.txt Tue May 23 18:05:26 2023 +0000 +++ b/test-data/test01_stats.txt Thu Sep 07 05:29:24 2023 +0000 @@ -1,7 +1,6 @@ -------------------------------------------------------------------------------- -Compute transcript with isoforms if any - +---------------------------------- transcript ---------------------------------- Number of gene 379 Number of transcript 379 Number of cds 376 @@ -14,67 +13,25 @@ mean cdss per transcript 1.0 mean exons per transcript 1.0 mean exons per cds 1.0 -Total gene length 342644 -Total transcript length 342644 -Total cds length 342338 -Total exon length 342644 -mean gene length 904 -mean transcript length 904 -mean cds length 910 -mean exon length 904 -mean cds piece length 910 +Total gene length (bp) 342644 +Total transcript length (bp) 342644 +Total cds length (bp) 342338 +Total exon length (bp) 342644 +mean gene length (bp) 904 +mean transcript length (bp) 904 +mean cds length (bp) 910 +mean exon length (bp) 904 +mean cds piece length (bp) 910 % of genome covered by gene 33.1 % of genome covered by transcript 33.1 % of genome covered by cds 33.1 % of genome covered by exon 33.1 -Longest gene 9499 -Longest transcript 9499 -Longest cds 9499 -Longest exon 9499 -Longest cds piece 9499 -Shortest gene 54 -Shortest transcript 54 -Shortest cds 54 -Shortest exon 54 -Shortest cds piece 54 - -Re-compute transcript without isoforms asked. We remove shortest isoforms if any +Longest gene (bp) 9499 +Longest transcript (bp) 9499 +Longest cds (bp) 9499 +Longest exon (bp) 9499 +Longest cds piece (bp) 9499 +Shortest gene (bp) 54 +Shortest transcript (bp) 54 +Shortest cds piece (bp) 54 -Number of gene 379 -Number of transcript 379 -Number of cds 376 -Number of exon 379 -Number of exon in cds 376 -Number gene overlapping 62 -Number of single exon gene 379 -Number of single exon transcript 379 -mean transcripts per gene 1.0 -mean cdss per transcript 1.0 -mean exons per transcript 1.0 -mean exons per cds 1.0 -Total gene length 342644 -Total transcript length 342644 -Total cds length 342338 -Total exon length 342644 -mean gene length 904 -mean transcript length 904 -mean cds length 910 -mean exon length 904 -mean cds piece length 910 -% of genome covered by gene 33.1 -% of genome covered by transcript 33.1 -% of genome covered by cds 33.1 -% of genome covered by exon 33.1 -Longest gene 9499 -Longest transcript 9499 -Longest cds 9499 -Longest exon 9499 -Longest cds piece 9499 -Shortest gene 54 -Shortest transcript 54 -Shortest cds 54 -Shortest exon 54 -Shortest cds piece 54 - --------------------------------------------------------------------------------- -
--- a/test-data/test03.txt Tue May 23 18:05:26 2023 +0000 +++ b/test-data/test03.txt Thu Sep 07 05:29:24 2023 +0000 @@ -1,4 +1,4 @@ -usage: /home/laptop/miniconda3/envs/mulled-v1-d5d9956f5cc87a70e05e5aa3970eaf3637ef7e96fa1e50da0f6646fabcdc59e1/bin/agat_sp_compare_two_annotations.pl --gff1 annotation1.gtf --gff2 annotation2.gtf --output temp_output +usage: /home/laptop/miniconda3/envs/__agat@1.2.0/bin/agat_sp_compare_two_annotations.pl --gff1 annotation1.gtf --gff2 annotation2.gtf --output temp_output Results of number of genes from file1 that overlap genes from file2: ----------------------------------------------------------------------------------------------
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test07.gff Thu Sep 07 05:29:24 2023 +0000 @@ -0,0 +1,41 @@ +##gff-version 3 +NZ_CP027599.1 RefSeq gene 1052 2152 . + . ID=nbis-gene-2;Name=dnaN;gbkey=Gene;gene=dnaN;gene_biotype=protein_coding;gene_id=nbis-gene-2;locus_tag=C7A06_RS00010 +NZ_CP027599.1 RefSeq transcript 1052 2152 . + . ID=gene-C7A06_RS00010;Parent=nbis-gene-2;Name=dnaN;gbkey=Gene;gene=dnaN;gene_biotype=protein_coding;gene_id=nbis-gene-2;locus_tag=C7A06_RS00010;original_biotype=mrna;transcript_id=gene-C7A06_RS00010 +NZ_CP027599.1 Protein Homology exon 1052 2152 . + . ID=nbis-exon-2;Parent=gene-C7A06_RS00010;Dbxref=Genbank:WP_000673464.1;Name=WP_000673464.1;Ontology_term=GO:0006260,GO:0003887,GO:0009360;gbkey=CDS;gene=dnaN;gene_id=nbis-gene-2;go_component=DNA polymerase III complex|0009360||IEA;go_function=DNA-directed DNA polymerase activity|0003887||IEA;go_process=DNA replication|0006260||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_006177590.1;locus_tag=C7A06_RS00010;product=DNA polymerase III subunit beta;protein_id=WP_000673464.1;transcript_id=gene-C7A06_RS00010;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 1052 2152 . + 0 ID=cds-WP_000673464.1;Parent=gene-C7A06_RS00010;Dbxref=Genbank:WP_000673464.1;Name=WP_000673464.1;Ontology_term=GO:0006260,GO:0003887,GO:0009360;gbkey=CDS;gene=dnaN;gene_id=nbis-gene-2;go_component=DNA polymerase III complex|0009360||IEA;go_function=DNA-directed DNA polymerase activity|0003887||IEA;go_process=DNA replication|0006260||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_006177590.1;locus_tag=C7A06_RS00010;product=DNA polymerase III subunit beta;protein_id=WP_000673464.1;transcript_id=gene-C7A06_RS00010;transl_table=11 +NZ_CP027599.1 RefSeq gene 2152 3225 . + . ID=nbis-gene-3;Name=recF;gbkey=Gene;gene=recF;gene_biotype=protein_coding;gene_id=nbis-gene-3;locus_tag=C7A06_RS00015 +NZ_CP027599.1 RefSeq transcript 2152 3225 . + . ID=gene-C7A06_RS00015;Parent=nbis-gene-3;Name=recF;gbkey=Gene;gene=recF;gene_biotype=protein_coding;gene_id=nbis-gene-3;locus_tag=C7A06_RS00015;original_biotype=mrna;transcript_id=gene-C7A06_RS00015 +NZ_CP027599.1 Protein Homology exon 2152 3225 . + . ID=nbis-exon-3;Parent=gene-C7A06_RS00015;Dbxref=Genbank:WP_000060112.1;Name=WP_000060112.1;Ontology_term=GO:0006281,GO:0003697,GO:0005524;gbkey=CDS;gene=recF;gene_id=nbis-gene-3;go_function=single-stranded DNA binding|0003697||IEA,ATP binding|0005524||IEA;go_process=DNA repair|0006281||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_005121479.1;locus_tag=C7A06_RS00015;product=DNA replication/repair protein RecF;protein_id=WP_000060112.1;transcript_id=gene-C7A06_RS00015;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 2152 3225 . + 0 ID=cds-WP_000060112.1;Parent=gene-C7A06_RS00015;Dbxref=Genbank:WP_000060112.1;Name=WP_000060112.1;Ontology_term=GO:0006281,GO:0003697,GO:0005524;gbkey=CDS;gene=recF;gene_id=nbis-gene-3;go_function=single-stranded DNA binding|0003697||IEA,ATP binding|0005524||IEA;go_process=DNA repair|0006281||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_005121479.1;locus_tag=C7A06_RS00015;product=DNA replication/repair protein RecF;protein_id=WP_000060112.1;transcript_id=gene-C7A06_RS00015;transl_table=11 +NZ_CP027599.1 RefSeq gene 3254 5668 . + . ID=nbis-gene-4;Name=gyrB;gbkey=Gene;gene=gyrB;gene_biotype=protein_coding;gene_id=nbis-gene-4;locus_tag=C7A06_RS00020 +NZ_CP027599.1 RefSeq transcript 3254 5668 . + . ID=gene-C7A06_RS00020;Parent=nbis-gene-4;Name=gyrB;gbkey=Gene;gene=gyrB;gene_biotype=protein_coding;gene_id=nbis-gene-4;locus_tag=C7A06_RS00020;original_biotype=mrna;transcript_id=gene-C7A06_RS00020 +NZ_CP027599.1 Protein Homology exon 3254 5668 . + . ID=nbis-exon-4;Parent=gene-C7A06_RS00020;Dbxref=Genbank:WP_000072067.1;Name=WP_000072067.1;Ontology_term=GO:0006265,GO:0003918,GO:0009330;gbkey=CDS;gene=gyrB;gene_id=nbis-gene-4;go_component=DNA topoisomerase type II (double strand cut%2C ATP-hydrolyzing) complex|0009330||IEA;go_function=DNA topoisomerase type II (double strand cut%2C ATP-hydrolyzing) activity|0003918||IEA;go_process=DNA topological change|0006265||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_005121480.1;locus_tag=C7A06_RS00020;product=DNA topoisomerase (ATP-hydrolyzing) subunit B;protein_id=WP_000072067.1;transcript_id=gene-C7A06_RS00020;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 3254 5668 . + 0 ID=cds-WP_000072067.1;Parent=gene-C7A06_RS00020;Dbxref=Genbank:WP_000072067.1;Name=WP_000072067.1;Ontology_term=GO:0006265,GO:0003918,GO:0009330;gbkey=CDS;gene=gyrB;gene_id=nbis-gene-4;go_component=DNA topoisomerase type II (double strand cut%2C ATP-hydrolyzing) complex|0009330||IEA;go_function=DNA topoisomerase type II (double strand cut%2C ATP-hydrolyzing) activity|0003918||IEA;go_process=DNA topological change|0006265||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_005121480.1;locus_tag=C7A06_RS00020;product=DNA topoisomerase (ATP-hydrolyzing) subunit B;protein_id=WP_000072067.1;transcript_id=gene-C7A06_RS00020;transl_table=11 +NZ_CP027599.1 RefSeq gene 5908 6306 . + . ID=nbis-gene-5;Name=yidB;gbkey=Gene;gene=yidB;gene_biotype=protein_coding;gene_id=nbis-gene-5;locus_tag=C7A06_RS00025 +NZ_CP027599.1 RefSeq transcript 5908 6306 . + . ID=gene-C7A06_RS00025;Parent=nbis-gene-5;Name=yidB;gbkey=Gene;gene=yidB;gene_biotype=protein_coding;gene_id=nbis-gene-5;locus_tag=C7A06_RS00025;original_biotype=mrna;transcript_id=gene-C7A06_RS00025 +NZ_CP027599.1 Protein Homology exon 5908 6306 . + . ID=nbis-exon-5;Parent=gene-C7A06_RS00025;Dbxref=Genbank:WP_000522208.1;Name=WP_000522208.1;gbkey=CDS;gene=yidB;gene_id=nbis-gene-5;inference=COORDINATES: similar to AA sequence:RefSeq:NP_418153.4;locus_tag=C7A06_RS00025;product=YidB family protein;protein_id=WP_000522208.1;transcript_id=gene-C7A06_RS00025;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 5908 6306 . + 0 ID=cds-WP_000522208.1;Parent=gene-C7A06_RS00025;Dbxref=Genbank:WP_000522208.1;Name=WP_000522208.1;gbkey=CDS;gene=yidB;gene_id=nbis-gene-5;inference=COORDINATES: similar to AA sequence:RefSeq:NP_418153.4;locus_tag=C7A06_RS00025;product=YidB family protein;protein_id=WP_000522208.1;transcript_id=gene-C7A06_RS00025;transl_table=11 +NZ_CP027599.1 RefSeq gene 6421 7233 . + . ID=nbis-gene-6;Name=yidA;gbkey=Gene;gene=yidA;gene_biotype=protein_coding;gene_id=nbis-gene-6;locus_tag=C7A06_RS00030 +NZ_CP027599.1 RefSeq transcript 6421 7233 . + . ID=gene-C7A06_RS00030;Parent=nbis-gene-6;Name=yidA;gbkey=Gene;gene=yidA;gene_biotype=protein_coding;gene_id=nbis-gene-6;locus_tag=C7A06_RS00030;original_biotype=mrna;transcript_id=gene-C7A06_RS00030 +NZ_CP027599.1 Protein Homology exon 6421 7233 . + . ID=nbis-exon-6;Parent=gene-C7A06_RS00030;Dbxref=Genbank:WP_000985541.1;Name=WP_000985541.1;Ontology_term=GO:0016787;gbkey=CDS;gene=yidA;gene_id=nbis-gene-6;go_function=hydrolase activity|0016787||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:NP_418152.1;locus_tag=C7A06_RS00030;product=sugar-phosphatase;protein_id=WP_000985541.1;transcript_id=gene-C7A06_RS00030;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 6421 7233 . + 0 ID=cds-WP_000985541.1;Parent=gene-C7A06_RS00030;Dbxref=Genbank:WP_000985541.1;Name=WP_000985541.1;Ontology_term=GO:0016787;gbkey=CDS;gene=yidA;gene_id=nbis-gene-6;go_function=hydrolase activity|0016787||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:NP_418152.1;locus_tag=C7A06_RS00030;product=sugar-phosphatase;protein_id=WP_000985541.1;transcript_id=gene-C7A06_RS00030;transl_table=11 +NZ_CP027599.1 RefSeq gene 7279 7935 . - . ID=nbis-gene-7;Name=C7A06_RS00035;gbkey=Gene;gene_biotype=protein_coding;gene_id=nbis-gene-7;locus_tag=C7A06_RS00035 +NZ_CP027599.1 RefSeq transcript 7279 7935 . - . ID=gene-C7A06_RS00035;Parent=nbis-gene-7;Name=C7A06_RS00035;gbkey=Gene;gene_biotype=protein_coding;gene_id=nbis-gene-7;locus_tag=C7A06_RS00035;original_biotype=mrna;transcript_id=gene-C7A06_RS00035 +NZ_CP027599.1 Protein Homology exon 7279 7935 . - . ID=nbis-exon-7;Parent=gene-C7A06_RS00035;Dbxref=Genbank:WP_000772931.1;Name=WP_000772931.1;gbkey=CDS;gene_id=nbis-gene-7;inference=COORDINATES: similar to AA sequence:RefSeq:NP_709504.1;locus_tag=C7A06_RS00035;product=hypothetical protein;protein_id=WP_000772931.1;transcript_id=gene-C7A06_RS00035;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 7279 7935 . - 0 ID=cds-WP_000772931.1;Parent=gene-C7A06_RS00035;Dbxref=Genbank:WP_000772931.1;Name=WP_000772931.1;gbkey=CDS;gene_id=nbis-gene-7;inference=COORDINATES: similar to AA sequence:RefSeq:NP_709504.1;locus_tag=C7A06_RS00035;product=hypothetical protein;protein_id=WP_000772931.1;transcript_id=gene-C7A06_RS00035;transl_table=11 +NZ_CP027599.1 RefSeq gene 8213 8902 . + . ID=nbis-gene-8;Name=dgoR;gbkey=Gene;gene=dgoR;gene_biotype=protein_coding;gene_id=nbis-gene-8;locus_tag=C7A06_RS00040 +NZ_CP027599.1 RefSeq transcript 8213 8902 . + . ID=gene-C7A06_RS00040;Parent=nbis-gene-8;Name=dgoR;gbkey=Gene;gene=dgoR;gene_biotype=protein_coding;gene_id=nbis-gene-8;locus_tag=C7A06_RS00040;original_biotype=mrna;transcript_id=gene-C7A06_RS00040 +NZ_CP027599.1 Protein Homology exon 8213 8902 . + . ID=nbis-exon-8;Parent=gene-C7A06_RS00040;Dbxref=Genbank:WP_000174305.1;Name=WP_000174305.1;gbkey=CDS;gene=dgoR;gene_id=nbis-gene-8;inference=COORDINATES: similar to AA sequence:RefSeq:NP_709505.1;locus_tag=C7A06_RS00040;product=D-galactonate utilization transcriptional regulator DgoR;protein_id=WP_000174305.1;transcript_id=gene-C7A06_RS00040;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 8213 8902 . + 0 ID=cds-WP_000174305.1;Parent=gene-C7A06_RS00040;Dbxref=Genbank:WP_000174305.1;Name=WP_000174305.1;gbkey=CDS;gene=dgoR;gene_id=nbis-gene-8;inference=COORDINATES: similar to AA sequence:RefSeq:NP_709505.1;locus_tag=C7A06_RS00040;product=D-galactonate utilization transcriptional regulator DgoR;protein_id=WP_000174305.1;transcript_id=gene-C7A06_RS00040;transl_table=11 +NZ_CP027599.1 RefSeq gene 8899 9777 . + . ID=nbis-gene-9;Name=dgoK;gbkey=Gene;gene=dgoK;gene_biotype=protein_coding;gene_id=nbis-gene-9;locus_tag=C7A06_RS00045 +NZ_CP027599.1 RefSeq transcript 8899 9777 . + . ID=gene-C7A06_RS00045;Parent=nbis-gene-9;Name=dgoK;gbkey=Gene;gene=dgoK;gene_biotype=protein_coding;gene_id=nbis-gene-9;locus_tag=C7A06_RS00045;original_biotype=mrna;transcript_id=gene-C7A06_RS00045 +NZ_CP027599.1 Protein Homology exon 8899 9777 . + . ID=nbis-exon-9;Parent=gene-C7A06_RS00045;Dbxref=Genbank:WP_000127112.1;Name=WP_000127112.1;gbkey=CDS;gene=dgoK;gene_id=nbis-gene-9;inference=COORDINATES: similar to AA sequence:RefSeq:NP_709506.1;locus_tag=C7A06_RS00045;product=2-dehydro-3-deoxygalactonokinase;protein_id=WP_000127112.1;transcript_id=gene-C7A06_RS00045;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 8899 9777 . + 0 ID=cds-WP_000127112.1;Parent=gene-C7A06_RS00045;Dbxref=Genbank:WP_000127112.1;Name=WP_000127112.1;gbkey=CDS;gene=dgoK;gene_id=nbis-gene-9;inference=COORDINATES: similar to AA sequence:RefSeq:NP_709506.1;locus_tag=C7A06_RS00045;product=2-dehydro-3-deoxygalactonokinase;protein_id=WP_000127112.1;transcript_id=gene-C7A06_RS00045;transl_table=11 +NZ_CP027599.1 RefSeq gene 9761 10378 . + . ID=nbis-gene-10;Name=dgoA;gbkey=Gene;gene=dgoA;gene_biotype=protein_coding;gene_id=nbis-gene-10;locus_tag=C7A06_RS00050 +NZ_CP027599.1 RefSeq transcript 9761 10378 . + . ID=gene-C7A06_RS00050;Parent=nbis-gene-10;Name=dgoA;gbkey=Gene;gene=dgoA;gene_biotype=protein_coding;gene_id=nbis-gene-10;locus_tag=C7A06_RS00050;original_biotype=mrna;transcript_id=gene-C7A06_RS00050 +NZ_CP027599.1 Protein Homology exon 9761 10378 . + . ID=nbis-exon-10;Parent=gene-C7A06_RS00050;Dbxref=Genbank:WP_001198699.1;Name=WP_001198699.1;gbkey=CDS;gene=dgoA;gene_id=nbis-gene-10;inference=COORDINATES: similar to AA sequence:RefSeq:YP_026238.1;locus_tag=C7A06_RS00050;product=2-dehydro-3-deoxy-6-phosphogalactonate aldolase;protein_id=WP_001198699.1;transcript_id=gene-C7A06_RS00050;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 9761 10378 . + 0 ID=cds-WP_001198699.1;Parent=gene-C7A06_RS00050;Dbxref=Genbank:WP_001198699.1;Name=WP_001198699.1;gbkey=CDS;gene=dgoA;gene_id=nbis-gene-10;inference=COORDINATES: similar to AA sequence:RefSeq:YP_026238.1;locus_tag=C7A06_RS00050;product=2-dehydro-3-deoxy-6-phosphogalactonate aldolase;protein_id=WP_001198699.1;transcript_id=gene-C7A06_RS00050;transl_table=11 +NZ_CP027599.1 RefSeq gene 10375 11523 . + . ID=nbis-gene-11;Name=dgoD;gbkey=Gene;gene=dgoD;gene_biotype=protein_coding;gene_id=nbis-gene-11;locus_tag=C7A06_RS00055 +NZ_CP027599.1 RefSeq transcript 10375 11523 . + . ID=gene-C7A06_RS00055;Parent=nbis-gene-11;Name=dgoD;gbkey=Gene;gene=dgoD;gene_biotype=protein_coding;gene_id=nbis-gene-11;locus_tag=C7A06_RS00055;original_biotype=mrna;transcript_id=gene-C7A06_RS00055 +NZ_CP027599.1 Protein Homology exon 10375 11523 . + . ID=nbis-exon-11;Parent=gene-C7A06_RS00055;Dbxref=Genbank:WP_000705001.1;Name=WP_000705001.1;Ontology_term=GO:0009063,GO:0008869;gbkey=CDS;gene=dgoD;gene_id=nbis-gene-11;go_function=galactonate dehydratase activity|0008869||IEA;go_process=cellular amino acid catabolic process|0009063||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_020077623.1;locus_tag=C7A06_RS00055;product=galactonate dehydratase;protein_id=WP_000705001.1;transcript_id=gene-C7A06_RS00055;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 10375 11523 . + 0 ID=cds-WP_000705001.1;Parent=gene-C7A06_RS00055;Dbxref=Genbank:WP_000705001.1;Name=WP_000705001.1;Ontology_term=GO:0009063,GO:0008869;gbkey=CDS;gene=dgoD;gene_id=nbis-gene-11;go_function=galactonate dehydratase activity|0008869||IEA;go_process=cellular amino acid catabolic process|0009063||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_020077623.1;locus_tag=C7A06_RS00055;product=galactonate dehydratase;protein_id=WP_000705001.1;transcript_id=gene-C7A06_RS00055;transl_table=11
--- a/test-data/test07.tabular Tue May 23 18:05:26 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -##gff-version 3 -NZ_CP027599.1 RefSeq gene 1052 2152 . + . -NZ_CP027599.1 RefSeq transcript 1052 2152 . + . -NZ_CP027599.1 Protein Homology exon 1052 2152 . + . -NZ_CP027599.1 Protein Homology CDS 1052 2152 . + 0 -NZ_CP027599.1 RefSeq gene 2152 3225 . + . -NZ_CP027599.1 RefSeq transcript 2152 3225 . + . -NZ_CP027599.1 Protein Homology exon 2152 3225 . + . -NZ_CP027599.1 Protein Homology CDS 2152 3225 . + 0 -NZ_CP027599.1 RefSeq gene 3254 5668 . + . -NZ_CP027599.1 RefSeq transcript 3254 5668 . + . -NZ_CP027599.1 Protein Homology exon 3254 5668 . + . -NZ_CP027599.1 Protein Homology CDS 3254 5668 . + 0 -NZ_CP027599.1 RefSeq gene 5908 6306 . + . -NZ_CP027599.1 RefSeq transcript 5908 6306 . + . -NZ_CP027599.1 Protein Homology exon 5908 6306 . + . -NZ_CP027599.1 Protein Homology CDS 5908 6306 . + 0 -NZ_CP027599.1 RefSeq gene 6421 7233 . + . -NZ_CP027599.1 RefSeq transcript 6421 7233 . + . -NZ_CP027599.1 Protein Homology exon 6421 7233 . + . -NZ_CP027599.1 Protein Homology CDS 6421 7233 . + 0 -NZ_CP027599.1 RefSeq gene 7279 7935 . - . -NZ_CP027599.1 RefSeq transcript 7279 7935 . - . -NZ_CP027599.1 Protein Homology exon 7279 7935 . - . -NZ_CP027599.1 Protein Homology CDS 7279 7935 . - 0 -NZ_CP027599.1 RefSeq gene 8213 8902 . + . -NZ_CP027599.1 RefSeq transcript 8213 8902 . + . -NZ_CP027599.1 Protein Homology exon 8213 8902 . + . -NZ_CP027599.1 Protein Homology CDS 8213 8902 . + 0 -NZ_CP027599.1 RefSeq gene 8899 9777 . + . -NZ_CP027599.1 RefSeq transcript 8899 9777 . + . -NZ_CP027599.1 Protein Homology exon 8899 9777 . + . -NZ_CP027599.1 Protein Homology CDS 8899 9777 . + 0 -NZ_CP027599.1 RefSeq gene 9761 10378 . + . -NZ_CP027599.1 RefSeq transcript 9761 10378 . + . -NZ_CP027599.1 Protein Homology exon 9761 10378 . + . -NZ_CP027599.1 Protein Homology CDS 9761 10378 . + 0 -NZ_CP027599.1 RefSeq gene 10375 11523 . + . -NZ_CP027599.1 RefSeq transcript 10375 11523 . + . -NZ_CP027599.1 Protein Homology exon 10375 11523 . + . -NZ_CP027599.1 Protein Homology CDS 10375 11523 . + 0
--- a/test-data/test09.txt Tue May 23 18:05:26 2023 +0000 +++ b/test-data/test09.txt Thu Sep 07 05:29:24 2023 +0000 @@ -1,80 +1,131 @@ --------------------------------------------------------------------------------- -Compute transcript with isoforms if any - -Number of gene 10 -Number of transcript 10 -Number of cds 10 -Number of exon 10 -Number of exon in cds 10 -Number gene overlapping 4 -Number of single exon gene 10 -Number of single exon transcript 10 -mean transcripts per gene 1.0 -mean cdss per transcript 1.0 -mean exons per transcript 1.0 -mean exons per cds 1.0 -Total gene length 9795 -Total transcript length 9795 -Total cds length 9795 -Total exon length 9795 -mean gene length 979 -mean transcript length 979 -mean cds length 979 -mean exon length 979 -mean cds piece length 979 -% of genome covered by gene 0.9 -% of genome covered by transcript 0.9 -% of genome covered by cds 0.9 -% of genome covered by exon 0.9 -Longest gene 2415 -Longest transcript 2415 -Longest cds 2415 -Longest exon 2415 -Longest cds piece 2415 -Shortest gene 399 -Shortest transcript 399 -Shortest cds 399 -Shortest exon 399 -Shortest cds piece 399 - -Re-compute transcript without isoforms asked. We remove shortest isoforms if any - -Number of gene 10 -Number of transcript 10 -Number of cds 10 -Number of exon 10 -Number of exon in cds 10 -Number gene overlapping 4 -Number of single exon gene 10 -Number of single exon transcript 10 -mean transcripts per gene 1.0 -mean cdss per transcript 1.0 -mean exons per transcript 1.0 -mean exons per cds 1.0 -Total gene length 9795 -Total transcript length 9795 -Total cds length 9795 -Total exon length 9795 -mean gene length 979 -mean transcript length 979 -mean cds length 979 -mean exon length 979 -mean cds piece length 979 -% of genome covered by gene 0.9 -% of genome covered by transcript 0.9 -% of genome covered by cds 0.9 -% of genome covered by exon 0.9 -Longest gene 2415 -Longest transcript 2415 -Longest cds 2415 -Longest exon 2415 -Longest cds piece 2415 -Shortest gene 399 -Shortest transcript 399 -Shortest cds 399 -Shortest exon 399 -Shortest cds piece 399 - --------------------------------------------------------------------------------- - +Functional info gene@transcript records: + _____________________________________________________________________________ +| | Nb holded by | Nb gene | +| | gene | holding it | +|_____________________________________________________________________________| +| name | 10 | 10 | +|_____________________________________________________________________________| +| product | 0 | 0 | +|_____________________________________________________________________________| +| description | 0 | 0 | +|_____________________________________________________________________________| +| ontology_term | 0 | 0 | +|_____________________________________________________________________________| +| dbxref | 0 | 0 | +|_____________________________________________________________________________| +| dbxref:Genbank | 0 | 0 | +|_____________________________________________________________________________| +Nb gene = 10 +Nb gene with <name> attribute = 10 +Nb gene without <name> attribute = 0 +Nb gene with <product> attribute = 0 +Nb gene without <product> attribute = 10 +Nb gene with <description> attribute = 0 +Nb gene without <description> attribute = 10 +Nb gene with <ontology_term> attribute = 0 +Nb gene without <ontology_term> attribute = 10 +Nb gene with <dbxref> attribute = 0 +Nb gene without <dbxref> attribute = 10 +Nb gene with <Genbank> dbxref = 0 +Nb gene without <Genbank> dbxref = 10 + _______________________________________________________________________________________________________ +| | Nb holded by | Nb transcript | Nb gene with | +| | transcript | holding it | transcript holding it | +|_______________________________________________________________________________________________________| +| name | 10 | 10 | 10 | +|_______________________________________________________________________________________________________| +| product | 0 | 0 | 0 | +|_______________________________________________________________________________________________________| +| description | 0 | 0 | 0 | +|_______________________________________________________________________________________________________| +| ontology_term | 0 | 0 | 0 | +|_______________________________________________________________________________________________________| +| dbxref | 0 | 0 | 0 | +|_______________________________________________________________________________________________________| +| dbxref:Genbank | 0 | 0 | 0 | +|_______________________________________________________________________________________________________| +Nb gene = 10 +Nb transcript = 10 +Nb gene with <name> attribute = 10 +Nb gene without <name> attribute = 0 +Nb transcript with <name> attribute = 10 +Nb transcript without <name> attribute = 0 +Nb gene with <product> attribute = 0 +Nb gene without <product> attribute = 10 +Nb transcript with <product> attribute = 0 +Nb transcript without <product> attribute = 10 +Nb gene with <description> attribute = 0 +Nb gene without <description> attribute = 10 +Nb transcript with <description> attribute = 0 +Nb transcript without <description> attribute = 10 +Nb gene with <ontology_term> attribute = 0 +Nb gene without <ontology_term> attribute = 10 +Nb transcript with <ontology_term> attribute = 0 +Nb transcript without <ontology_term> attribute = 10 +Nb gene with <dbxref> attribute = 0 +Nb gene without <dbxref> attribute = 10 +Nb transcript with <dbxref> attribute = 0 +Nb transcript without <dbxref> attribute = 10 +Nb gene with Genbank dbxref = 0 +Nb gene without Genbank dbxref = 10 +Nb transcript with Genbank dbxref = 0 +Nb transcript without Genbank dbxref = 10 + _____________________________________________________________________________ +| | Nb holded by | Nb cds | +| | cds | holding it | +|_____________________________________________________________________________| +| name | 10 | 10 | +|_____________________________________________________________________________| +| product | 10 | 10 | +|_____________________________________________________________________________| +| description | 0 | 0 | +|_____________________________________________________________________________| +| ontology_term | 12 | 5 | +|_____________________________________________________________________________| +| dbxref | 10 | 10 | +|_____________________________________________________________________________| +| dbxref:Genbank | 10 | 10 | +|_____________________________________________________________________________| +Nb cds = 10 +Nb cds with <name> attribute = 10 +Nb cds without <name> attribute = 0 +Nb cds with <product> attribute = 10 +Nb cds without <product> attribute = 0 +Nb cds with <description> attribute = 0 +Nb cds without <description> attribute = 10 +Nb cds with <ontology_term> attribute = 5 +Nb cds without <ontology_term> attribute = 5 +Nb cds with <dbxref> attribute = 10 +Nb cds without <dbxref> attribute = 0 +Nb cds with <Genbank> dbxref = 10 +Nb cds without <Genbank> dbxref = 0 + _____________________________________________________________________________ +| | Nb holded by | Nb exon | +| | exon | holding it | +|_____________________________________________________________________________| +| name | 10 | 10 | +|_____________________________________________________________________________| +| product | 10 | 10 | +|_____________________________________________________________________________| +| description | 0 | 0 | +|_____________________________________________________________________________| +| ontology_term | 12 | 5 | +|_____________________________________________________________________________| +| dbxref | 10 | 10 | +|_____________________________________________________________________________| +| dbxref:Genbank | 10 | 10 | +|_____________________________________________________________________________| +Nb exon = 10 +Nb exon with <name> attribute = 10 +Nb exon without <name> attribute = 0 +Nb exon with <product> attribute = 10 +Nb exon without <product> attribute = 0 +Nb exon with <description> attribute = 0 +Nb exon without <description> attribute = 10 +Nb exon with <ontology_term> attribute = 5 +Nb exon without <ontology_term> attribute = 5 +Nb exon with <dbxref> attribute = 10 +Nb exon without <dbxref> attribute = 0 +Nb exon with <Genbank> dbxref = 10 +Nb exon without <Genbank> dbxref = 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test13.gff Thu Sep 07 05:29:24 2023 +0000 @@ -0,0 +1,54 @@ +##gff-version 3 +##gtf-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +#!genome-build ASM301845v1 +#!genome-build-accession NCBI_Assembly:GCF_003018455.1 +#!annotation-date 05/25/2022 04:54:31 +#!annotation-source NCBI RefSeq +##sequence-region NZ_CP027599.1 1 5942969 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=562 +NZ_CP027599.1 RefSeq gene 1052 2152 . + . ID=nbis-gene-2;Name=dnaN;gbkey=Gene;gene=dnaN;gene_biotype=protein_coding;gene_id=nbis-gene-2;locus_tag=C7A06_RS00010 +NZ_CP027599.1 RefSeq transcript 1052 2152 . + . ID=gene-C7A06_RS00010;Parent=nbis-gene-2;Name=dnaN;gbkey=Gene;gene=dnaN;gene_biotype=protein_coding;gene_id=nbis-gene-2;locus_tag=C7A06_RS00010;original_biotype=mrna;transcript_id=gene-C7A06_RS00010 +NZ_CP027599.1 Protein Homology exon 1052 2152 . + . ID=nbis-exon-2;Parent=gene-C7A06_RS00010;Dbxref=Genbank:WP_000673464.1;Name=WP_000673464.1;Ontology_term=GO:0006260,GO:0003887,GO:0009360;gbkey=CDS;gene=dnaN;gene_id=nbis-gene-2;go_component=DNA polymerase III complex|0009360||IEA;go_function=DNA-directed DNA polymerase activity|0003887||IEA;go_process=DNA replication|0006260||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_006177590.1;locus_tag=C7A06_RS00010;product=DNA polymerase III subunit beta;protein_id=WP_000673464.1;transcript_id=gene-C7A06_RS00010;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 1052 2152 . + 0 ID=cds-WP_000673464.1;Parent=gene-C7A06_RS00010;Dbxref=Genbank:WP_000673464.1;Name=WP_000673464.1;Ontology_term=GO:0006260,GO:0003887,GO:0009360;gbkey=CDS;gene=dnaN;gene_id=nbis-gene-2;go_component=DNA polymerase III complex|0009360||IEA;go_function=DNA-directed DNA polymerase activity|0003887||IEA;go_process=DNA replication|0006260||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_006177590.1;locus_tag=C7A06_RS00010;product=DNA polymerase III subunit beta;protein_id=WP_000673464.1;transcript_id=gene-C7A06_RS00010;transl_table=11 +NZ_CP027599.1 RefSeq gene 2152 3225 . + . ID=nbis-gene-3;Name=recF;gbkey=Gene;gene=recF;gene_biotype=protein_coding;gene_id=nbis-gene-3;locus_tag=C7A06_RS00015 +NZ_CP027599.1 RefSeq transcript 2152 3225 . + . ID=gene-C7A06_RS00015;Parent=nbis-gene-3;Name=recF;gbkey=Gene;gene=recF;gene_biotype=protein_coding;gene_id=nbis-gene-3;locus_tag=C7A06_RS00015;original_biotype=mrna;transcript_id=gene-C7A06_RS00015 +NZ_CP027599.1 Protein Homology exon 2152 3225 . + . ID=nbis-exon-3;Parent=gene-C7A06_RS00015;Dbxref=Genbank:WP_000060112.1;Name=WP_000060112.1;Ontology_term=GO:0006281,GO:0003697,GO:0005524;gbkey=CDS;gene=recF;gene_id=nbis-gene-3;go_function=single-stranded DNA binding|0003697||IEA,ATP binding|0005524||IEA;go_process=DNA repair|0006281||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_005121479.1;locus_tag=C7A06_RS00015;product=DNA replication/repair protein RecF;protein_id=WP_000060112.1;transcript_id=gene-C7A06_RS00015;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 2152 3225 . + 0 ID=cds-WP_000060112.1;Parent=gene-C7A06_RS00015;Dbxref=Genbank:WP_000060112.1;Name=WP_000060112.1;Ontology_term=GO:0006281,GO:0003697,GO:0005524;gbkey=CDS;gene=recF;gene_id=nbis-gene-3;go_function=single-stranded DNA binding|0003697||IEA,ATP binding|0005524||IEA;go_process=DNA repair|0006281||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_005121479.1;locus_tag=C7A06_RS00015;product=DNA replication/repair protein RecF;protein_id=WP_000060112.1;transcript_id=gene-C7A06_RS00015;transl_table=11 +NZ_CP027599.1 RefSeq gene 3254 5668 . + . ID=nbis-gene-4;Name=gyrB;gbkey=Gene;gene=gyrB;gene_biotype=protein_coding;gene_id=nbis-gene-4;locus_tag=C7A06_RS00020 +NZ_CP027599.1 RefSeq transcript 3254 5668 . + . ID=gene-C7A06_RS00020;Parent=nbis-gene-4;Name=gyrB;gbkey=Gene;gene=gyrB;gene_biotype=protein_coding;gene_id=nbis-gene-4;locus_tag=C7A06_RS00020;original_biotype=mrna;transcript_id=gene-C7A06_RS00020 +NZ_CP027599.1 Protein Homology exon 3254 5668 . + . ID=nbis-exon-4;Parent=gene-C7A06_RS00020;Dbxref=Genbank:WP_000072067.1;Name=WP_000072067.1;Ontology_term=GO:0006265,GO:0003918,GO:0009330;gbkey=CDS;gene=gyrB;gene_id=nbis-gene-4;go_component=DNA topoisomerase type II (double strand cut%2C ATP-hydrolyzing) complex|0009330||IEA;go_function=DNA topoisomerase type II (double strand cut%2C ATP-hydrolyzing) activity|0003918||IEA;go_process=DNA topological change|0006265||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_005121480.1;locus_tag=C7A06_RS00020;product=DNA topoisomerase (ATP-hydrolyzing) subunit B;protein_id=WP_000072067.1;transcript_id=gene-C7A06_RS00020;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 3254 5668 . + 0 ID=cds-WP_000072067.1;Parent=gene-C7A06_RS00020;Dbxref=Genbank:WP_000072067.1;Name=WP_000072067.1;Ontology_term=GO:0006265,GO:0003918,GO:0009330;gbkey=CDS;gene=gyrB;gene_id=nbis-gene-4;go_component=DNA topoisomerase type II (double strand cut%2C ATP-hydrolyzing) complex|0009330||IEA;go_function=DNA topoisomerase type II (double strand cut%2C ATP-hydrolyzing) activity|0003918||IEA;go_process=DNA topological change|0006265||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_005121480.1;locus_tag=C7A06_RS00020;product=DNA topoisomerase (ATP-hydrolyzing) subunit B;protein_id=WP_000072067.1;transcript_id=gene-C7A06_RS00020;transl_table=11 +NZ_CP027599.1 RefSeq gene 5908 6306 . + . ID=nbis-gene-5;Name=yidB;gbkey=Gene;gene=yidB;gene_biotype=protein_coding;gene_id=nbis-gene-5;locus_tag=C7A06_RS00025 +NZ_CP027599.1 RefSeq transcript 5908 6306 . + . ID=gene-C7A06_RS00025;Parent=nbis-gene-5;Name=yidB;gbkey=Gene;gene=yidB;gene_biotype=protein_coding;gene_id=nbis-gene-5;locus_tag=C7A06_RS00025;original_biotype=mrna;transcript_id=gene-C7A06_RS00025 +NZ_CP027599.1 Protein Homology exon 5908 6306 . + . ID=nbis-exon-5;Parent=gene-C7A06_RS00025;Dbxref=Genbank:WP_000522208.1;Name=WP_000522208.1;gbkey=CDS;gene=yidB;gene_id=nbis-gene-5;inference=COORDINATES: similar to AA sequence:RefSeq:NP_418153.4;locus_tag=C7A06_RS00025;product=YidB family protein;protein_id=WP_000522208.1;transcript_id=gene-C7A06_RS00025;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 5908 6306 . + 0 ID=cds-WP_000522208.1;Parent=gene-C7A06_RS00025;Dbxref=Genbank:WP_000522208.1;Name=WP_000522208.1;gbkey=CDS;gene=yidB;gene_id=nbis-gene-5;inference=COORDINATES: similar to AA sequence:RefSeq:NP_418153.4;locus_tag=C7A06_RS00025;product=YidB family protein;protein_id=WP_000522208.1;transcript_id=gene-C7A06_RS00025;transl_table=11 +NZ_CP027599.1 RefSeq gene 6421 7233 . + . ID=nbis-gene-6;Name=yidA;gbkey=Gene;gene=yidA;gene_biotype=protein_coding;gene_id=nbis-gene-6;locus_tag=C7A06_RS00030 +NZ_CP027599.1 RefSeq transcript 6421 7233 . + . ID=gene-C7A06_RS00030;Parent=nbis-gene-6;Name=yidA;gbkey=Gene;gene=yidA;gene_biotype=protein_coding;gene_id=nbis-gene-6;locus_tag=C7A06_RS00030;original_biotype=mrna;transcript_id=gene-C7A06_RS00030 +NZ_CP027599.1 Protein Homology exon 6421 7233 . + . ID=nbis-exon-6;Parent=gene-C7A06_RS00030;Dbxref=Genbank:WP_000985541.1;Name=WP_000985541.1;Ontology_term=GO:0016787;gbkey=CDS;gene=yidA;gene_id=nbis-gene-6;go_function=hydrolase activity|0016787||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:NP_418152.1;locus_tag=C7A06_RS00030;product=sugar-phosphatase;protein_id=WP_000985541.1;transcript_id=gene-C7A06_RS00030;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 6421 7233 . + 0 ID=cds-WP_000985541.1;Parent=gene-C7A06_RS00030;Dbxref=Genbank:WP_000985541.1;Name=WP_000985541.1;Ontology_term=GO:0016787;gbkey=CDS;gene=yidA;gene_id=nbis-gene-6;go_function=hydrolase activity|0016787||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:NP_418152.1;locus_tag=C7A06_RS00030;product=sugar-phosphatase;protein_id=WP_000985541.1;transcript_id=gene-C7A06_RS00030;transl_table=11 +NZ_CP027599.1 RefSeq gene 7279 7935 . - . ID=nbis-gene-7;Name=C7A06_RS00035;gbkey=Gene;gene_biotype=protein_coding;gene_id=nbis-gene-7;locus_tag=C7A06_RS00035 +NZ_CP027599.1 RefSeq transcript 7279 7935 . - . ID=gene-C7A06_RS00035;Parent=nbis-gene-7;Name=C7A06_RS00035;gbkey=Gene;gene_biotype=protein_coding;gene_id=nbis-gene-7;locus_tag=C7A06_RS00035;original_biotype=mrna;transcript_id=gene-C7A06_RS00035 +NZ_CP027599.1 Protein Homology exon 7279 7935 . - . ID=nbis-exon-7;Parent=gene-C7A06_RS00035;Dbxref=Genbank:WP_000772931.1;Name=WP_000772931.1;gbkey=CDS;gene_id=nbis-gene-7;inference=COORDINATES: similar to AA sequence:RefSeq:NP_709504.1;locus_tag=C7A06_RS00035;product=hypothetical protein;protein_id=WP_000772931.1;transcript_id=gene-C7A06_RS00035;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 7279 7935 . - 0 ID=cds-WP_000772931.1;Parent=gene-C7A06_RS00035;Dbxref=Genbank:WP_000772931.1;Name=WP_000772931.1;gbkey=CDS;gene_id=nbis-gene-7;inference=COORDINATES: similar to AA sequence:RefSeq:NP_709504.1;locus_tag=C7A06_RS00035;product=hypothetical protein;protein_id=WP_000772931.1;transcript_id=gene-C7A06_RS00035;transl_table=11 +NZ_CP027599.1 RefSeq gene 8213 8902 . + . ID=nbis-gene-8;Name=dgoR;gbkey=Gene;gene=dgoR;gene_biotype=protein_coding;gene_id=nbis-gene-8;locus_tag=C7A06_RS00040 +NZ_CP027599.1 RefSeq transcript 8213 8902 . + . ID=gene-C7A06_RS00040;Parent=nbis-gene-8;Name=dgoR;gbkey=Gene;gene=dgoR;gene_biotype=protein_coding;gene_id=nbis-gene-8;locus_tag=C7A06_RS00040;original_biotype=mrna;transcript_id=gene-C7A06_RS00040 +NZ_CP027599.1 Protein Homology exon 8213 8902 . + . ID=nbis-exon-8;Parent=gene-C7A06_RS00040;Dbxref=Genbank:WP_000174305.1;Name=WP_000174305.1;gbkey=CDS;gene=dgoR;gene_id=nbis-gene-8;inference=COORDINATES: similar to AA sequence:RefSeq:NP_709505.1;locus_tag=C7A06_RS00040;product=D-galactonate utilization transcriptional regulator DgoR;protein_id=WP_000174305.1;transcript_id=gene-C7A06_RS00040;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 8213 8902 . + 0 ID=cds-WP_000174305.1;Parent=gene-C7A06_RS00040;Dbxref=Genbank:WP_000174305.1;Name=WP_000174305.1;gbkey=CDS;gene=dgoR;gene_id=nbis-gene-8;inference=COORDINATES: similar to AA sequence:RefSeq:NP_709505.1;locus_tag=C7A06_RS00040;product=D-galactonate utilization transcriptional regulator DgoR;protein_id=WP_000174305.1;transcript_id=gene-C7A06_RS00040;transl_table=11 +NZ_CP027599.1 RefSeq gene 8899 9777 . + . ID=nbis-gene-9;Name=dgoK;gbkey=Gene;gene=dgoK;gene_biotype=protein_coding;gene_id=nbis-gene-9;locus_tag=C7A06_RS00045 +NZ_CP027599.1 RefSeq transcript 8899 9777 . + . ID=gene-C7A06_RS00045;Parent=nbis-gene-9;Name=dgoK;gbkey=Gene;gene=dgoK;gene_biotype=protein_coding;gene_id=nbis-gene-9;locus_tag=C7A06_RS00045;original_biotype=mrna;transcript_id=gene-C7A06_RS00045 +NZ_CP027599.1 Protein Homology exon 8899 9777 . + . ID=nbis-exon-9;Parent=gene-C7A06_RS00045;Dbxref=Genbank:WP_000127112.1;Name=WP_000127112.1;gbkey=CDS;gene=dgoK;gene_id=nbis-gene-9;inference=COORDINATES: similar to AA sequence:RefSeq:NP_709506.1;locus_tag=C7A06_RS00045;product=2-dehydro-3-deoxygalactonokinase;protein_id=WP_000127112.1;transcript_id=gene-C7A06_RS00045;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 8899 9777 . + 0 ID=cds-WP_000127112.1;Parent=gene-C7A06_RS00045;Dbxref=Genbank:WP_000127112.1;Name=WP_000127112.1;gbkey=CDS;gene=dgoK;gene_id=nbis-gene-9;inference=COORDINATES: similar to AA sequence:RefSeq:NP_709506.1;locus_tag=C7A06_RS00045;product=2-dehydro-3-deoxygalactonokinase;protein_id=WP_000127112.1;transcript_id=gene-C7A06_RS00045;transl_table=11 +NZ_CP027599.1 RefSeq gene 9761 10378 . + . ID=nbis-gene-10;Name=dgoA;gbkey=Gene;gene=dgoA;gene_biotype=protein_coding;gene_id=nbis-gene-10;locus_tag=C7A06_RS00050 +NZ_CP027599.1 RefSeq transcript 9761 10378 . + . ID=gene-C7A06_RS00050;Parent=nbis-gene-10;Name=dgoA;gbkey=Gene;gene=dgoA;gene_biotype=protein_coding;gene_id=nbis-gene-10;locus_tag=C7A06_RS00050;original_biotype=mrna;transcript_id=gene-C7A06_RS00050 +NZ_CP027599.1 Protein Homology exon 9761 10378 . + . ID=nbis-exon-10;Parent=gene-C7A06_RS00050;Dbxref=Genbank:WP_001198699.1;Name=WP_001198699.1;gbkey=CDS;gene=dgoA;gene_id=nbis-gene-10;inference=COORDINATES: similar to AA sequence:RefSeq:YP_026238.1;locus_tag=C7A06_RS00050;product=2-dehydro-3-deoxy-6-phosphogalactonate aldolase;protein_id=WP_001198699.1;transcript_id=gene-C7A06_RS00050;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 9761 10378 . + 0 ID=cds-WP_001198699.1;Parent=gene-C7A06_RS00050;Dbxref=Genbank:WP_001198699.1;Name=WP_001198699.1;gbkey=CDS;gene=dgoA;gene_id=nbis-gene-10;inference=COORDINATES: similar to AA sequence:RefSeq:YP_026238.1;locus_tag=C7A06_RS00050;product=2-dehydro-3-deoxy-6-phosphogalactonate aldolase;protein_id=WP_001198699.1;transcript_id=gene-C7A06_RS00050;transl_table=11 +NZ_CP027599.1 RefSeq gene 10375 11523 . + . ID=nbis-gene-11;Name=dgoD;gbkey=Gene;gene=dgoD;gene_biotype=protein_coding;gene_id=nbis-gene-11;locus_tag=C7A06_RS00055 +NZ_CP027599.1 RefSeq transcript 10375 11523 . + . ID=gene-C7A06_RS00055;Parent=nbis-gene-11;Name=dgoD;gbkey=Gene;gene=dgoD;gene_biotype=protein_coding;gene_id=nbis-gene-11;locus_tag=C7A06_RS00055;original_biotype=mrna;transcript_id=gene-C7A06_RS00055 +NZ_CP027599.1 Protein Homology exon 10375 11523 . + . ID=nbis-exon-11;Parent=gene-C7A06_RS00055;Dbxref=Genbank:WP_000705001.1;Name=WP_000705001.1;Ontology_term=GO:0009063,GO:0008869;gbkey=CDS;gene=dgoD;gene_id=nbis-gene-11;go_function=galactonate dehydratase activity|0008869||IEA;go_process=cellular amino acid catabolic process|0009063||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_020077623.1;locus_tag=C7A06_RS00055;product=galactonate dehydratase;protein_id=WP_000705001.1;transcript_id=gene-C7A06_RS00055;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 10375 11523 . + 0 ID=cds-WP_000705001.1;Parent=gene-C7A06_RS00055;Dbxref=Genbank:WP_000705001.1;Name=WP_000705001.1;Ontology_term=GO:0009063,GO:0008869;gbkey=CDS;gene=dgoD;gene_id=nbis-gene-11;go_function=galactonate dehydratase activity|0008869||IEA;go_process=cellular amino acid catabolic process|0009063||IEA;inference=COORDINATES: similar to AA sequence:RefSeq:WP_020077623.1;locus_tag=C7A06_RS00055;product=galactonate dehydratase;protein_id=WP_000705001.1;transcript_id=gene-C7A06_RS00055;transl_table=11 +NZ_CP027599.1 RefSeq gene 11598 12935 . + . ID=nbis-gene-12;Name=dgoT;gbkey=Gene;gene=dgoT;gene_biotype=protein_coding;gene_id=nbis-gene-12;locus_tag=C7A06_RS00060 +NZ_CP027599.1 RefSeq transcript 11598 12935 . + . ID=gene-C7A06_RS00060;Parent=nbis-gene-12;Name=dgoT;gbkey=Gene;gene=dgoT;gene_biotype=protein_coding;gene_id=nbis-gene-12;locus_tag=C7A06_RS00060;original_biotype=mrna;transcript_id=gene-C7A06_RS00060 +NZ_CP027599.1 Protein Homology exon 11598 12935 . + . ID=nbis-exon-12;Parent=gene-C7A06_RS00060;Dbxref=Genbank:WP_000253455.1;Name=WP_000253455.1;gbkey=CDS;gene=dgoT;gene_id=nbis-gene-12;inference=COORDINATES: similar to AA sequence:RefSeq:NP_709507.1;locus_tag=C7A06_RS00060;product=MFS transporter;protein_id=WP_000253455.1;transcript_id=gene-C7A06_RS00060;transl_table=11 +NZ_CP027599.1 Protein Homology CDS 11598 12935 . + 0 ID=cds-WP_000253455.1;Parent=gene-C7A06_RS00060;Dbxref=Genbank:WP_000253455.1;Name=WP_000253455.1;gbkey=CDS;gene=dgoT;gene_id=nbis-gene-12;inference=COORDINATES: similar to AA sequence:RefSeq:NP_709507.1;locus_tag=C7A06_RS00060;product=MFS transporter;protein_id=WP_000253455.1;transcript_id=gene-C7A06_RS00060;transl_table=11