Mercurial > repos > iuc > jcvi_gff_stats
changeset 0:2defe48e4c7a draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jcvi_gff_stats commit f1e2c895bec4b8bf960d54f634c69fe18411846a
author | iuc |
---|---|
date | Thu, 02 Aug 2018 09:57:30 -0400 |
parents | |
children | 8cffbd184762 |
files | jcvi_gff_stats.xml macros.xml test-data/all.pdf test-data/all_fasta.loc test-data/annot.gff3 test-data/genome.fasta test-data/summary.txt tool-data/all_fasta.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 10 files changed, 428 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jcvi_gff_stats.xml Thu Aug 02 09:57:30 2018 -0400 @@ -0,0 +1,91 @@ +<?xml version="1.0"?> +<tool id="jcvi_gff_stats" name="Genome annotation statistics" profile="16.04" version="@VERSION@"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command><![CDATA[ + + ln -s '${gff}' 'input.gff' + + && + + python -m jcvi.annotation.stats genestats 'input.gff' > '${summary}' + + && + + #if str($ref_genome.genome_type_select) == "indexed": + python -m jcvi.annotation.stats summary 'input.gff' '${ref_genome.genome.fields.path}' 2>&1 | tail -n +3 >> '${summary}' + #else + python -m jcvi.annotation.stats summary 'input.gff' '${ref_genome.genome}' 2>&1 | tail -n +3 >> '${summary}' + #end if + + && + + python -m jcvi.annotation.stats stats 'input.gff' 2>&1 | grep Mean >> '${summary}' + + && + + python -m jcvi.annotation.stats histogram 'input.gff' + + && + + pdfunite *.input.pdf '${graphs}' + ]]></command> + <inputs> + <param name="gff" type="data" format="gff" label="Annotation to analyse"/> + <conditional name="ref_genome"> + <param label="Reference genome" name="genome_type_select" type="select"> + <option selected="True" value="indexed">Use a built-in genome</option> + <option value="history">Use a genome from history</option> + </param> + <when value="indexed"> + <param + help="If your genome of interest is not listed, contact the Galaxy server administrators" + label="Select a reference genome" + name="genome" + type="select" + > + <options from_data_table="all_fasta"> + <filter column="2" type="sort_by" /> + <validator message="No genomes are available" type="no_options" /> + </options> + </param> + </when> + <when value="history"> + <param name="genome" type="data" format="fasta" label="Corresponding genome sequence"/> + </when> + </conditional> + </inputs> + <outputs> + <data format="txt" name="summary" label="${tool.name} on ${on_string}: summary"/> + <data format="pdf" name="graphs" label="${tool.name} on ${on_string}: gene length"/> + </outputs> + <tests> + <test> + <param name="gff" value="annot.gff3"/> + <conditional name="ref_genome"> + <param name="genome_type_select" value="history"/> + <param name="genome" value="genome.fasta"/> + </conditional> + <output name="summary" file="summary.txt"/> + <output name="graphs" file="all.pdf" compare="sim_size"/> + </test> + <test> + <param name="gff" value="annot.gff3"/> + <conditional name="ref_genome"> + <param name="genome_type_select" value="indexed"/> + <param name="genome" value="merlin"/> + </conditional> + <output name="summary" file="summary.txt"/> + <output name="graphs" file="all.pdf" compare="sim_size"/> + </test> + </tests> + <help><![CDATA[ + Based on JCVI Python utility libraries on genome assembly, annotation and comparative genomics. + + .. _JCVI Python utilities: https://github.com/tanghaibao/jcvi + ]]></help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Aug 02 09:57:30 2018 -0400 @@ -0,0 +1,18 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">jcvi</requirement> + <requirement type="package" version="0.67.0">poppler</requirement> + <yield /> + </requirements> + </xml> + <token name="@VERSION@">0.8.4</token> + + <xml name="citations"> + <citations> + <citation type="doi">10.5281/zenodo.31631</citation> + </citations> + </xml> + +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/all_fasta.loc Thu Aug 02 09:57:30 2018 -0400 @@ -0,0 +1,19 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# +merlin merlin Merlin ${__HERE__}/genome.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annot.gff3 Thu Aug 02 09:57:30 2018 -0400 @@ -0,0 +1,41 @@ +##gff-version 3 +HS08198 maker gene 352 1848 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0;Name=maker-HS08198-exonerate_est2genome-gene-0.0 +HS08198 maker mRNA 352 1848 2869 + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1;Parent=maker-HS08198-exonerate_est2genome-gene-0.0;Name=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1;_AED=0.00;_eAED=0.00;_QI=70|1|1|1|0|0|7|0|192 +HS08198 maker exon 352 397 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:9;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker exon 421 582 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:10;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker exon 812 894 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:11;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker exon 1053 1123 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:12;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker exon 1208 1315 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:13;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker exon 1587 1688 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:14;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker exon 1772 1848 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:15;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker five_prime_UTR 352 397 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:five_prime_utr;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker five_prime_UTR 421 444 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:five_prime_utr;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker CDS 445 582 . + 0 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker CDS 812 894 . + 0 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker CDS 1053 1123 . + 1 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker CDS 1208 1315 . + 2 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker CDS 1587 1688 . + 2 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +HS08198 maker CDS 1772 1848 . + 2 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1 +### +HS04636 maker gene 1813 6903 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0;Name=maker-HS04636-exonerate_est2genome-gene-0.0 +HS04636 maker mRNA 1813 6903 8728 + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1;Parent=maker-HS04636-exonerate_est2genome-gene-0.0;Name=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1;_AED=0.00;_eAED=0.00;_QI=49|1|1|1|0|0|9|0|572 +HS04636 maker exon 1813 1934 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:0;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 2055 2198 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:1;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 2852 2995 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:2;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 3426 3607 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:3;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 4340 4423 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:4;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 4543 4789 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:5;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 5072 5358 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:6;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 5860 6007 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:7;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker exon 6494 6903 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:8;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker five_prime_UTR 1813 1861 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:five_prime_utr;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 1862 1934 . + 0 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 2055 2198 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 2852 2995 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 3426 3607 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 4340 4423 . + 0 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 4543 4789 . + 0 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 5072 5358 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 5860 6007 . + 0 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +HS04636 maker CDS 6494 6903 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1 +###
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.fasta Thu Aug 02 09:57:30 2018 -0400 @@ -0,0 +1,200 @@ +>HS04636 +gagctcacattaactatttacagggtaactgcttaggaccagtattatgaggagaattta +cctttcccgcctctctttccaagaaacaaggagggggtgaaggtacggagaacagtattt +cttctgttgaaagcaacttagctacaaagataaattacagctatgtacactgaaggtagc +tatttcattccacaaaataagagttttttaaaaagctatgtatgtatgtgctgcatatag +agcagatatacagcctattaagcgtcgtcactaaaacataaaacatgtcagcctttctta +accttactcgccccagtctgtcccgacgtgacttcctcgaccctctaaagacgtacagac +cagacacggcggcggcggcgggagaggggattccctgcgcccccggacctcagggccgct +cagattcctggagaggaagccaagtgtccttctgccctcccccggtatcccatccaaggc +gatcagtccagaactggctctcggaagcgctcgggcaaagactgcgaagaagaaaagaca +tctggcggaaacctgtgcgcctggggcggtggaactcggggaggagagggagggatcaga +caggagagtggggactaccccctctgctcccaaattggggcagcttcctgggtttccgat +tttctcatttccgtgggtaaaaaaccctgcccccaccgggcttacgcaatttttttaagg +ggagaggagggaaaaaatttgtgggggggtacgaaaaggcggaaagaaacagtcattcac +atgggcttggttttcagtcttataaaaaggaaggttctctcggttagcgaccaattgtca +tacgacttgcagtgagcgtcaggagcacgtccaggaactcctcagcagcgcctccttcag +ctccacagccagacgccctcagacagcaaagcctacccccgcgccgcgccctgcccgccg +ctcggatgctcgcccgcgccctgctgctgtgcgcggtcctggcgctcagccatacaggtg +agtacctggcgccgcgcaccggggactccggttccacgcacccgggcagagtttccgctc +tgacctcctgggtctatcccagtactccgacttctctccgaatagagaagctacgtgact +tgggaaagagcttggaccgctagagtccgaaagaactccgtggatattccagctttccca +caagcactgatcattatgagccagttacttaaccgatctgagacactctcacctcctaaa +tagggatagatgatactaatttgcaggttgtcattatgataagacaggatctgatcaata +tatgtgaattgtttatatttggaacctttttattgagtggaagaagttgttttaaatatt +ctagtcagttctttcctgctcccaggaaagcccggattatgttttaagataagcaaaatg +tcttaaaagtaagctgttttactttgaatttttccctaaatgttgattagtgtactagat +ccattttaatttggaaagtgaagtgctacttatttgaacttcttaaaaatgctaatttta +acatctaaagagttaactaagaaaagcttagtaacatgatgtaccaagttgaatatgctg +ttatccttatttagaatagaaaattggtatttctacgttttatccattctaaggcaggtt +aaaaaattgtatttccatgactacctatatatttcttgaatttattattgtaaagttgat +tcatagtcaaacaattaaatgtttaaattaagattaagacactagagaatgatttatttg +ctgtcctttaattgcagcaaatccttgctgttcccacccatgtcaaaaccgaggtgtatg +tatgagtgtgggatttgaccagtataagtgcgattgtacccggacaggattctatggaga +aaactgctcaacacgtaagtttgtcctttggttgcctcattaggagtggggctggataca +gttatcattgtatagatttgtgtcttataatgagtcccattaatttctccctccctttct +tcgtcttcttgcagcggaatttttgacaagaataaaattatttctgaaacccactccaaa +cacagtgcactacatacttacccacttcaagggattttggaacgttgtgaataacattcc +cttccttcgaaatgcaattatgagttatgtcttgacatgtaagtacaagtgtctttctaa +ggtttttagccttctcaaagaaaaatatgctttataatactgtaagcctaatctaaaaac +atatttccaagcttatcaaaaagactttaagatagcttttaagtttgccttccatcttaa +tcgccaaaaatattgacatttagtcccatccagtttatacagtctgctcacaactctgta +tacctcttctaacctttactgtttggtcagtttgtggaggtagcatggtccagctgttta +ttgaatgcccatgggccacagaattgttctgaacatgtagcacccattaaaataaatttg +gatttggatcagcaagaaaataactttccatgattctaaagtgggtgccatactcagcca +ttcctttcataggcctcttggatagtgagcagatggctacctgaaaaatcaatattgcca +gattataatgtgcagagtatatgtattttattaaagatgtatttcaagtggccattagac +tataaagtgtagttgtttaaaaatagattttttttattttggagttacattcaacctcag +gtgccactttccacattttacaataaaaataatggttgatttacttaacaaatgagaata +aataaaacatttttttctttgaaaatttcagccagatcacatttgattgacagtccacca +acttacaatgctgactatggctacaaaagctgggaagccttctctaacctctcctattat +actagagcccttcctcctgtgcctgatgattgcccgactcccttgggtgtcaaaggtgag +taagaagaatccattagagatgtattaactataagacgggctgcattgctgccaaaaaaa +aaaattgaccttagactaccatttatttattaacaaaagcagtttttacttttagcatgg +ttatctatgggtattttttaaagtatgagtctatataaactattatgtaaaagcaaatga +gcgtcttggtataatgtcttaatattttcaaattatttctttagaaatgaaataattcta +attaaaatagataaaatcattcagtaagaagttgttccaccatatcttagaactgttgtt +tatattatgatcctattcacaattgtaattctcatataaatgaagaattcttggtagatt +gacagtcaccatctcctttcttgaatacatagatggattcttaccttagctttctcattt +ttcaggtaaaaagcagcttcctgattcaaatgagattgtggaaaaattgcttctaagaag +aaagttcatccctgatccccagggctcaaacatgatgtttgcattctttgcccagcactt +cacgcatcagtttttcaagacagatcataagcgagggccagctttcaccaacgggctggg +ccatggggtaagatagagttaatatcttagagttagtaaaattataccaaatcatagtca +agggctaacattaaaggagatatacagatagatagatccaaataacttatccactttttt +taaaaagaagtcttatctataaaaaccttaaaggaattttccatttacttcactggtcta +gtaaaattatacacacacacagacatgcacacacatatataaacattcacacacatacat +atgtacaggtattgttatttgtaatttgacccttgtattttttagtttaaaatgttagta +ctgcaaaatgttatgtcctcaaaaacacattgtaccatgattatgccgctttcaatattg +taaagtgaggtttttgccgcattattattttttggatttcaatagcatagcttcaagtta +ttcgtaagaattttttataaataatacatttttatacttttttataattaccatatcatc +atagtgaagtatataatatatatgatataagctcaatatagtatattaattccgttaaac +acaaagacatatcagtttgtagctttggtggataaacaaattaatttagcaattcatggc +tatgaaaaatgtatattttatttaaaaattttaaagaaagctaaatgatcaaattattta +atgatgaattatatgatagacactttatataagaaaaacttcaacagcaacaaattaaaa +ttttttcatcattttctaggtggacttaaatcatatttacggtgaaactctggctagaca +gcgtaaactgcgccttttcaaggatggaaaaatgaaatatcaggtatgcttcctttgact +attaagacttagttattaccgcttatacccatattttaaaatccctaaaaatgtgttcct +taactttttaactgatgtttatttatttatttatttttttagataattgatggagagatg +tatcctcccacagtcaaagatactcaggcagagatgatctaccctcctcaagtccctgag +catctacggtttgctgtggggcaggaggtctttggtctggtgcctggtctgatgatgtat +gccacaatctggctgcgggaacacaacagagtatgcgatgtgcttaaacaggagcatcct +gaatggggtgatgagcagttgttccagacaagcaggctaatactgataggtaaacaagaa +aatgatttatataaaaccctcttccccagggaaaattagtgtgctatctttgttatgttt +tgagtaaatgacaagatgtggtaaatgaaaactcacacattctatatacattaaatatgt +aagcatgactgataaaatagctatcttttgatactgacaaggaagaaaacagaaatgaag +gaatagcaaattttaaaaattgcattccagttgcttgaaagcttgtgatcagatgcaata +aatgtttttattatttattttgtgcaaataggagagactattaagattgtgattgaagat +tatgtgcaacacttgagtggctatcacttcaaactgaaatttgacccagaactacttttc +aacaaacaattccagtaccaaaatcgtattgctgctgaatttaacaccctctatcactgg +catccccttctgcctgacacctttcaaattcatgaccagaaatacaactatcaacagttt +atctacaacaactctatattgctggaacatggaattacccagtttgttgaatcattcacc +aggcaaattgctggcagggtaagcattattattgaaaaccaaaacaaaagactagtcagt +aactttagaatttctgccacggaaattatttttcttaaacttactaaaagagtagttagt +tatattgctagtaaaattattttattgatataagaagcctaactttgtttgaaaagtcta +aacttttagtctagtctacagttgtcagacaaatagcaaattgtacccctaccttaaaaa +tattttcaaaaagtatctataatcttataggaataaatattttaggcttgaatactagtg +ttatttttgaaatgtaaaaaggcaaattagttctaggctggtgtcccattgaattttaag +cagagctcctgttgaaatgtaggtaagcatctttccagcaaataaaaattgtctccgctg +ggagtttcagttttacctgatttgtacctaaggcaagctgaatacaaacagtaaatatgc +ctaaaattcttgttttacaactaattttactttccacaggttgctggtggtaggaatgtt +ccacccgcagtacagaaagtatcacaggcttccattgaccagagcaggcagatgaaatac +cagtcttttaatgagtaccgcaaacgctttatgctgaagccctatgaatcatttgaagaa +cttacaggtaagaaacagtttctaaacttcttcgttttttgtttgtttgtttgtttttgt +tgtttttggttttcttttcgagatggagccgccctctgtcacccaggctggagtgcagtg +gcgccatctcggctcactgcaacctccgcctcctgggttcaagcaattctcctgcctcaa +cttcctgagtagctgggactacaggctcacgtcgcacgcatggataattttttgtatttt +cagtatagacggggtttcaccgtgttagccaggctggtctcaaactcctgacctagtgat +ccgccggcttcggcctcccgaagtgctgggattacaggcgtgagccaccgcgcctggccc +ctaaacttcttaaaagaatcaggggtcaaatggaaacagagaagttggcagcaaattgag +caaaagaatcaaactgttttttattttgtgaagtttgacattggttgtatctctgtcttc +atcgccttcacaggagaaaaggaaatgtctgcagagttggaagcactctatggtgacatc +gatgctgtggagctgtatcctgcccttctggtagaaaagcctcggccagatgccatcttt +ggtgaaaccatggtagaagttggagcaccattctccttgaaaggacttatgggtaatgtt +atatgttctcctgcctactggaagccaagcacttttggtggagaagtgggttttcaaatc +atcaacactgcctcaattcagtctctcatctgcaataacgtgaagggctgtccctttact +tcattcagtgttccagatccagagctcattaaaacagtcaccatcaatgcaagttcttcc +cgctccggactagatgatatcaatcccacagtactactaaaagaacgttcgactgaactg +tagaagtctaatgatcatatttatttatttatatgaaccatgtctattaatttaattatt +taataatatttatattaaactccttatgttacttaacatcttctgtaacagaagtcagta +ctcctgttgcggagaaaggagtcatacttgtgaagacttttatgtcactactctaaagat +tttgctgttgctgttaagtttggaaaacagtttttattctgttttataaaccagagagaa +atgagttttgacgtctttttacttgaatttcaacttatattataagaacgaaagtaaaga +tgtttgaatacttaaacactatcacaagatggcaaaatgctgaaagtttttacactgtcg +atgtttccaatgcatcttccatgatgcattagaagtaactaatgtttgaaattttaaagt +acttttggttatttttctgtcatcaaacaaaaacaggtatcagtgcattattaaatgaat +atttaaattagacattaccagtaatttcatgtctactttttaaaatcagcaatgaaacaa +taatttgaaatttctaaattcatagggtagaatcacctgtaaaagcttgtttgatttctt +aaagttattaaacttgtacatataccaaaaagaagctgtcttggatttaaatctgtaaaa +tcagatgaaattttactacaattgcttgttaaaatattttataagtgatgttcctttttc +accaagagtataaacctttttagtgtgactgttaaaacttccttttaaatcaaaatgcca +aatttattaaggtggtggagccactgcagtgttatctcaaaataagaatattttgttgag +atattccagaatttgtttatatggctggtaacatgtaaaatctatatcagcaaaagggtc +tacctttaaaataagcaataacaaagaagaaaaccaaattattgttcaaatttaggttta +aacttttgaagcaaacttttttttatccttgtgcactgcaggcctggtactcagattttg +ctatgaggttaatgaagtaccaagctgtgcttgaataacgatatgttttctcagattttc +tgttgtacagtttaatttagcagtccatatcacattgcaaaagtagcaatgacctcataa +aatacctcttcaaaatgcttaaattcatttcacacattaattttatctcagtcttgaagc +caattcagtaggtgcattggaatcaagcctggctacctgcatgctgttccttttcttttc +ttcttttagccattttgctaagagacacagtcttctcatcacttcgtttctcctattttg +ttttactagttttaagatcagagttcactttctttggactctgcctatattttcttacct +gaacttttgcaagttttcaggtaaacctcagctcaggactgctatttagctcctcttaag +aagattaaaagagaaaaaaaaaggcccttttaaaaatagtatacacttattttaagtgaa +aagcagagaattttatttatagctaattttagctatctgtaaccaagatggatgcaaaga +ggctagtgcctcagagagaactgtacggggtttgtgactggaaaaagttacgttcccatt +ctaattaatgccctttcttatttaaaaacaaaaccaaatgatatctaagtagttctcagc +aataataataatgacgataatacttcttttccacatctcattgtcactgacatttaatgg +tactgtatattacttaatttattgaagattattatttatgtcttattaggacactatggt +tataaactgtgtttaagcctacaatcattgatttttttttgttatgtcacaatcagtata +ttttctttggggttacctctctgaatattatgtaaacaatccaaagaaatgattgtatta +agatttgtgaataaatttttagaaatctgattggcatattgagatatttaaggttgaatg +tttgtccttaggataggcctatgtgctagcccacaaagaatattgtctcattagcctgaa +tgtgccataagactgaccttttaaaatgttttgagggatctgtggatgcttcgttaattt +gttcagccacaatttattgagaaaatattctgtgtcaagcactgtgggttttaatatttt +taaatcaaacgctgattacagataatagtatttatataaataattgaaaaaaattttctt +ttgggaagagggagaaaatgaaataaatatcattaaagataactcaggagaatcttcttt +acaattttacgtttagaatgtttaaggttaagaaagaaatagtcaatatgcttgtataaa +acactgttcactgttttttttaaaaaaaaaacttgatttgttattaacattgatctgctg +acaaaacctgggaatttgggttgtgtatgcgaatgtttcagtgcctcagacaaatgtgta +tttaacttatgtaaaagataagtctggaaataaatgtctgtttatttttgtactatttaa +aaaaaaaaaaaaaaatcgatgtcgactcgagtc +>HS08198 +agcgggcggcggtcgtgggcggggttgcaggcgaggctcaacgaacgctggtctgaccgt +cggcgctccctgttgccgggccctgagcaagtggcttcatgaaccccgtgacgttggcca +tggagataagaccactgggtgatggtttaaggaagataacgtgtaaagggctaaggactg +tcggtggaaatcaggggtgcaggagaaatggataaacagccagaggtcaactcggacttt +gtacataggacatggtgccaggccctgccaggaagtgcagatcgaagctaggctcacgag +gaggctggaggtggggggtggggaggcaacggatggacatggacttcctgggctgggctc +tgtgacagcagagtagactctgtcctgggacttggtggtgctacccttggcctcccacag +tcctgccaccctgctgccgccaccatgctgccccctgggactgcgaccctcttgactctg +ctcctggcagctggctcgctgggccagaagcctcagaggccacgccggcccgcatccccc +atcagcaccatccagcccaaggccaattttgatgcgcagcaggtagaagttggggggggt +agagggaggcaggtagaagttgtgggaggggtagagggagacaggtagaagttgttgcgg +gggagagggaagcaggtgaagttgtggggggtgtagagggaagcaggtgaggggccctcc +cacagtgccctcgagttctcccatggtctgcccccagtttgcagggacctggctccttgt +ggctgtgggctccgcttgccgtttcctgcaggagcagggccaccgggccgaggccaccac +actgcatgtggctccccagggcacagccatggctgtcagtaccttccgaaagctgtgagt +cccagagcagccctgcaccctaaccccaaccctcctctcagcccccggacttcagccctg +ctctggcccctgaccccaccccggctgtggcctggactaggattcctggttggggtctcc +cagcctgtggtgcctcctccccgcccccccagggatgggatctgctggcaggtgcgccag +ctctatggagacacaggggtcctcggccgcttcctgcttcaaggtgaggcaggggctgca +ggtcatgtgggtgggggatgacgcagccactgtggctctctgacatggctactgtggctc +tgcccagcccgaggcgcccgaggggctgtgcacgtggttgtcgctgagaccgactaccag +agtttcgctgtcctgtacctggagcgggcggggcagctgtcagtgaagctctacggtatg +tgggggccagcctctgtgaccaggcaggcgctcaagctctgcacactcactgggccaccc +cgaggggctgggtgagccatggggacacacttcctttctcccatcctgatcctcctgcta +agcaggggcccagggagtagtgacagacaggcctggtgtgggagcagggaggagggcccc +gaggggcaggggacacacagaccccgttcccagagccctccacgccgcctggtgccagga +ccccaggaaccctgtctgccctgcagcccgctcgctccctgtgagcgactcggtcctgag +tgggtttgagcagcgggtccaggaggcccacctgactgaggaccagatcttctacttccc +caagtacggtgagtgtccccagcaggtccccagctcagccacccccactctctggctgat +gtccagcctgacccctgccttggcgccccaggcttctgcgaggctgcagaccagttccac +gtcctggacggtgagtgcacagcgggggcaagcatggcggcgtggtgaggggggccactc +gcaccggctgagtctcgtctctgctgcagaagtgaggaggtgaggccggcacacagctcc +agtgctgagaagtcagtgccccgagagacgaccccaccagtggggtgcccgctgcctgtc +ctccgtgaaaccagcctcagatcagggccctgccacccagggcaggggatcttctgccgg +ctgccccagaggacagtgggtggagtggtacctacttattaaatgtctcagacccctctc +tgactcttctgtccactctggaccggcgccagtaccaccaaggccctctctgcccccacc +ccgcctctttaaaagcccggcgctccctgttggctggagtccacgcagggtcactgggcc +gatttcggctcttgggatttgggaggggagatcctctctggcatatgccatcttgtgccc +tgctggacctgggggcgtccacgtcactccaaggctgctcttgcctgggccatgcctgca +gccc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/summary.txt Thu Aug 02 09:57:30 2018 -0400 @@ -0,0 +1,27 @@ +================================================================== + o all +------------------------------------------------------------------ + Max number of transcripts per gene 1 + Mean exon size 151.1 + Mean gene locus size (first to last exon) 3294.0 + Mean number of distinct exons per gene 8.0 + Mean number of transcripts per gene 1.0 + Mean transcript size (UTR, CDS) 1208.5 + Number of distinct exons 16 + Number of genes 2 +Number of genes with alternative transcript variants 0 (0.0%) + Number of multi-exon genes 2 (100.0%) + Number of predicted transcripts 2 + Number of single-exon genes 0 (0.0%) +------------------------------------------------------------------ +===================================================================================================== + o % GC % of genome Average size (bp) Median size (bp) Number Total length (Mb) +----------------------------------------------------------------------------------------------------- + Exon 50% 20% 151 133 16 0Mb + Gene 43% 56% 3,294 3,294 2 0Mb +Intron 39% 35% 297 250 14 0Mb +----------------------------------------------------------------------------------------------------- +Exon_Length: Min=71 Max=410 N=15 Mean=153.2 SD=92.1160138087 Median=138.0 Sum=2298 +Intron_Length: Min=83 Max=732 N=13 Mean=319.076923077 SD=211.891649212 Median=271.0 Sum=4148 +Gene_Length: Min=579 Max=1719 N=2 Mean=1149.0 SD=570.0 Median=1149.0 Sum=2298 +Exon_Count: Min=6 Max=9 N=2 Mean=7.5 SD=1.5 Median=7.5 Sum=15
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Thu Aug 02 09:57:30 2018 -0400 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Aug 02 09:57:30 2018 -0400 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Thu Aug 02 09:57:30 2018 -0400 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/all_fasta.loc" /> + </table> +</tables>