Repository 'jcvi_gff_stats'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/jcvi_gff_stats

Changeset 0:2defe48e4c7a (2018-08-02)
Next changeset 1:8cffbd184762 (2018-08-02)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jcvi_gff_stats commit f1e2c895bec4b8bf960d54f634c69fe18411846a
added:
jcvi_gff_stats.xml
macros.xml
test-data/all.pdf
test-data/all_fasta.loc
test-data/annot.gff3
test-data/genome.fasta
test-data/summary.txt
tool-data/all_fasta.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 2defe48e4c7a jcvi_gff_stats.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jcvi_gff_stats.xml Thu Aug 02 09:57:30 2018 -0400
[
@@ -0,0 +1,91 @@
+<?xml version="1.0"?>
+<tool id="jcvi_gff_stats" name="Genome annotation statistics" profile="16.04" version="@VERSION@">
+    <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command><![CDATA[
+
+        ln -s '${gff}' 'input.gff'
+
+        &&
+
+        python -m jcvi.annotation.stats genestats 'input.gff' > '${summary}'
+
+        &&
+
+        #if str($ref_genome.genome_type_select) == "indexed":
+            python -m jcvi.annotation.stats summary 'input.gff' '${ref_genome.genome.fields.path}' 2>&1 | tail -n +3 >> '${summary}'
+        #else
+            python -m jcvi.annotation.stats summary 'input.gff' '${ref_genome.genome}' 2>&1 | tail -n +3 >> '${summary}'
+        #end if
+
+        &&
+
+        python -m jcvi.annotation.stats stats 'input.gff' 2>&1 | grep Mean >> '${summary}'
+
+        &&
+
+        python -m jcvi.annotation.stats histogram 'input.gff'
+
+        &&
+
+        pdfunite *.input.pdf '${graphs}'
+    ]]></command>
+    <inputs>
+        <param name="gff" type="data" format="gff" label="Annotation to analyse"/>
+        <conditional name="ref_genome">
+            <param label="Reference genome" name="genome_type_select" type="select">
+                <option selected="True" value="indexed">Use a built-in genome</option>
+                <option value="history">Use a genome from history</option>
+            </param>
+            <when value="indexed">
+                <param
+                  help="If your genome of interest is not listed, contact the Galaxy server administrators"
+                  label="Select a reference genome"
+                  name="genome"
+                  type="select"
+                  >
+                    <options from_data_table="all_fasta">
+                        <filter column="2" type="sort_by" />
+                        <validator message="No genomes are available" type="no_options" />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                  <param name="genome" type="data" format="fasta" label="Corresponding genome sequence"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data format="txt" name="summary" label="${tool.name} on ${on_string}: summary"/>
+        <data format="pdf" name="graphs" label="${tool.name} on ${on_string}: gene length"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="gff" value="annot.gff3"/>
+            <conditional name="ref_genome">
+                <param name="genome_type_select" value="history"/>
+                <param name="genome" value="genome.fasta"/>
+            </conditional>
+            <output name="summary" file="summary.txt"/>
+            <output name="graphs" file="all.pdf" compare="sim_size"/>
+        </test>
+        <test>
+            <param name="gff" value="annot.gff3"/>
+            <conditional name="ref_genome">
+                <param name="genome_type_select" value="indexed"/>
+                <param name="genome" value="merlin"/>
+            </conditional>
+            <output name="summary" file="summary.txt"/>
+            <output name="graphs" file="all.pdf" compare="sim_size"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+        Based on JCVI Python utility libraries on genome assembly, annotation and comparative genomics.
+
+        .. _JCVI Python utilities: https://github.com/tanghaibao/jcvi
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 2defe48e4c7a macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Aug 02 09:57:30 2018 -0400
b
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">jcvi</requirement>
+            <requirement type="package" version="0.67.0">poppler</requirement>
+            <yield />
+        </requirements>
+    </xml>
+    <token name="@VERSION@">0.8.4</token>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.5281/zenodo.31631</citation>
+        </citations>
+    </xml>
+
+</macros>
b
diff -r 000000000000 -r 2defe48e4c7a test-data/all.pdf
b
Binary file test-data/all.pdf has changed
b
diff -r 000000000000 -r 2defe48e4c7a test-data/all_fasta.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc Thu Aug 02 09:57:30 2018 -0400
b
@@ -0,0 +1,19 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
+merlin merlin Merlin ${__HERE__}/genome.fasta
b
diff -r 000000000000 -r 2defe48e4c7a test-data/annot.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annot.gff3 Thu Aug 02 09:57:30 2018 -0400
b
@@ -0,0 +1,41 @@
+##gff-version 3
+HS08198 maker gene 352 1848 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0;Name=maker-HS08198-exonerate_est2genome-gene-0.0
+HS08198 maker mRNA 352 1848 2869 + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1;Parent=maker-HS08198-exonerate_est2genome-gene-0.0;Name=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1;_AED=0.00;_eAED=0.00;_QI=70|1|1|1|0|0|7|0|192
+HS08198 maker exon 352 397 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:9;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1
+HS08198 maker exon 421 582 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:10;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1
+HS08198 maker exon 812 894 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:11;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1
+HS08198 maker exon 1053 1123 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:12;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1
+HS08198 maker exon 1208 1315 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:13;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1
+HS08198 maker exon 1587 1688 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:14;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1
+HS08198 maker exon 1772 1848 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:exon:15;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1
+HS08198 maker five_prime_UTR 352 397 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:five_prime_utr;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1
+HS08198 maker five_prime_UTR 421 444 . + . ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:five_prime_utr;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1
+HS08198 maker CDS 445 582 . + 0 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1
+HS08198 maker CDS 812 894 . + 0 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1
+HS08198 maker CDS 1053 1123 . + 1 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1
+HS08198 maker CDS 1208 1315 . + 2 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1
+HS08198 maker CDS 1587 1688 . + 2 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1
+HS08198 maker CDS 1772 1848 . + 2 ID=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS08198-exonerate_est2genome-gene-0.0-mRNA-1
+###
+HS04636 maker gene 1813 6903 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0;Name=maker-HS04636-exonerate_est2genome-gene-0.0
+HS04636 maker mRNA 1813 6903 8728 + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1;Parent=maker-HS04636-exonerate_est2genome-gene-0.0;Name=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1;_AED=0.00;_eAED=0.00;_QI=49|1|1|1|0|0|9|0|572
+HS04636 maker exon 1813 1934 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:0;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker exon 2055 2198 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:1;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker exon 2852 2995 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:2;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker exon 3426 3607 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:3;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker exon 4340 4423 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:4;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker exon 4543 4789 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:5;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker exon 5072 5358 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:6;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker exon 5860 6007 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:7;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker exon 6494 6903 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:exon:8;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker five_prime_UTR 1813 1861 . + . ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:five_prime_utr;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker CDS 1862 1934 . + 0 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker CDS 2055 2198 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker CDS 2852 2995 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker CDS 3426 3607 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker CDS 4340 4423 . + 0 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker CDS 4543 4789 . + 0 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker CDS 5072 5358 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker CDS 5860 6007 . + 0 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+HS04636 maker CDS 6494 6903 . + 2 ID=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1:cds;Parent=maker-HS04636-exonerate_est2genome-gene-0.0-mRNA-1
+###
b
diff -r 000000000000 -r 2defe48e4c7a test-data/genome.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome.fasta Thu Aug 02 09:57:30 2018 -0400
b
b'@@ -0,0 +1,200 @@\n+>HS04636\n+gagctcacattaactatttacagggtaactgcttaggaccagtattatgaggagaattta\n+cctttcccgcctctctttccaagaaacaaggagggggtgaaggtacggagaacagtattt\n+cttctgttgaaagcaacttagctacaaagataaattacagctatgtacactgaaggtagc\n+tatttcattccacaaaataagagttttttaaaaagctatgtatgtatgtgctgcatatag\n+agcagatatacagcctattaagcgtcgtcactaaaacataaaacatgtcagcctttctta\n+accttactcgccccagtctgtcccgacgtgacttcctcgaccctctaaagacgtacagac\n+cagacacggcggcggcggcgggagaggggattccctgcgcccccggacctcagggccgct\n+cagattcctggagaggaagccaagtgtccttctgccctcccccggtatcccatccaaggc\n+gatcagtccagaactggctctcggaagcgctcgggcaaagactgcgaagaagaaaagaca\n+tctggcggaaacctgtgcgcctggggcggtggaactcggggaggagagggagggatcaga\n+caggagagtggggactaccccctctgctcccaaattggggcagcttcctgggtttccgat\n+tttctcatttccgtgggtaaaaaaccctgcccccaccgggcttacgcaatttttttaagg\n+ggagaggagggaaaaaatttgtgggggggtacgaaaaggcggaaagaaacagtcattcac\n+atgggcttggttttcagtcttataaaaaggaaggttctctcggttagcgaccaattgtca\n+tacgacttgcagtgagcgtcaggagcacgtccaggaactcctcagcagcgcctccttcag\n+ctccacagccagacgccctcagacagcaaagcctacccccgcgccgcgccctgcccgccg\n+ctcggatgctcgcccgcgccctgctgctgtgcgcggtcctggcgctcagccatacaggtg\n+agtacctggcgccgcgcaccggggactccggttccacgcacccgggcagagtttccgctc\n+tgacctcctgggtctatcccagtactccgacttctctccgaatagagaagctacgtgact\n+tgggaaagagcttggaccgctagagtccgaaagaactccgtggatattccagctttccca\n+caagcactgatcattatgagccagttacttaaccgatctgagacactctcacctcctaaa\n+tagggatagatgatactaatttgcaggttgtcattatgataagacaggatctgatcaata\n+tatgtgaattgtttatatttggaacctttttattgagtggaagaagttgttttaaatatt\n+ctagtcagttctttcctgctcccaggaaagcccggattatgttttaagataagcaaaatg\n+tcttaaaagtaagctgttttactttgaatttttccctaaatgttgattagtgtactagat\n+ccattttaatttggaaagtgaagtgctacttatttgaacttcttaaaaatgctaatttta\n+acatctaaagagttaactaagaaaagcttagtaacatgatgtaccaagttgaatatgctg\n+ttatccttatttagaatagaaaattggtatttctacgttttatccattctaaggcaggtt\n+aaaaaattgtatttccatgactacctatatatttcttgaatttattattgtaaagttgat\n+tcatagtcaaacaattaaatgtttaaattaagattaagacactagagaatgatttatttg\n+ctgtcctttaattgcagcaaatccttgctgttcccacccatgtcaaaaccgaggtgtatg\n+tatgagtgtgggatttgaccagtataagtgcgattgtacccggacaggattctatggaga\n+aaactgctcaacacgtaagtttgtcctttggttgcctcattaggagtggggctggataca\n+gttatcattgtatagatttgtgtcttataatgagtcccattaatttctccctccctttct\n+tcgtcttcttgcagcggaatttttgacaagaataaaattatttctgaaacccactccaaa\n+cacagtgcactacatacttacccacttcaagggattttggaacgttgtgaataacattcc\n+cttccttcgaaatgcaattatgagttatgtcttgacatgtaagtacaagtgtctttctaa\n+ggtttttagccttctcaaagaaaaatatgctttataatactgtaagcctaatctaaaaac\n+atatttccaagcttatcaaaaagactttaagatagcttttaagtttgccttccatcttaa\n+tcgccaaaaatattgacatttagtcccatccagtttatacagtctgctcacaactctgta\n+tacctcttctaacctttactgtttggtcagtttgtggaggtagcatggtccagctgttta\n+ttgaatgcccatgggccacagaattgttctgaacatgtagcacccattaaaataaatttg\n+gatttggatcagcaagaaaataactttccatgattctaaagtgggtgccatactcagcca\n+ttcctttcataggcctcttggatagtgagcagatggctacctgaaaaatcaatattgcca\n+gattataatgtgcagagtatatgtattttattaaagatgtatttcaagtggccattagac\n+tataaagtgtagttgtttaaaaatagattttttttattttggagttacattcaacctcag\n+gtgccactttccacattttacaataaaaataatggttgatttacttaacaaatgagaata\n+aataaaacatttttttctttgaaaatttcagccagatcacatttgattgacagtccacca\n+acttacaatgctgactatggctacaaaagctgggaagccttctctaacctctcctattat\n+actagagcccttcctcctgtgcctgatgattgcccgactcccttgggtgtcaaaggtgag\n+taagaagaatccattagagatgtattaactataagacgggctgcattgctgccaaaaaaa\n+aaaattgaccttagactaccatttatttattaacaaaagcagtttttacttttagcatgg\n+ttatctatgggtattttttaaagtatgagtctatataaactattatgtaaaagcaaatga\n+gcgtcttggtataatgtcttaatattttcaaattatttctttagaaatgaaataattcta\n+attaaaatagataaaatcattcagtaagaagttgttccaccatatcttagaactgttgtt\n+tatattatgatcctattcacaattgtaattctcatataaatgaagaattcttggtagatt\n+gacagtcaccatctcctttcttgaatacatagatggattcttaccttagctttctcattt\n+ttcaggtaaaaagcagcttcctgattcaaatgagattgtggaaaaattgcttctaagaag\n+aaagttcatccctgatccccagggctcaaacatgatgtttgcattctttgcccagcactt\n+cacgcatcagtttttcaagacagatcataagcgagggccagctttcaccaacgggctggg\n+ccatggggtaagatagagttaatatcttagagttagtaaaattataccaaatcatagtca\n+agggctaacattaaaggagatatacagatagatagatccaaataacttatccactttttt\n+taaaaagaagtcttatctataaaaaccttaaaggaattttccatttacttcactggtcta\n+gtaaaattatacacacacacagacatgcacacacatatataaacattcacacacatacat\n+at'..b'ccaagctgtgcttgaataacgatatgttttctcagattttc\n+tgttgtacagtttaatttagcagtccatatcacattgcaaaagtagcaatgacctcataa\n+aatacctcttcaaaatgcttaaattcatttcacacattaattttatctcagtcttgaagc\n+caattcagtaggtgcattggaatcaagcctggctacctgcatgctgttccttttcttttc\n+ttcttttagccattttgctaagagacacagtcttctcatcacttcgtttctcctattttg\n+ttttactagttttaagatcagagttcactttctttggactctgcctatattttcttacct\n+gaacttttgcaagttttcaggtaaacctcagctcaggactgctatttagctcctcttaag\n+aagattaaaagagaaaaaaaaaggcccttttaaaaatagtatacacttattttaagtgaa\n+aagcagagaattttatttatagctaattttagctatctgtaaccaagatggatgcaaaga\n+ggctagtgcctcagagagaactgtacggggtttgtgactggaaaaagttacgttcccatt\n+ctaattaatgccctttcttatttaaaaacaaaaccaaatgatatctaagtagttctcagc\n+aataataataatgacgataatacttcttttccacatctcattgtcactgacatttaatgg\n+tactgtatattacttaatttattgaagattattatttatgtcttattaggacactatggt\n+tataaactgtgtttaagcctacaatcattgatttttttttgttatgtcacaatcagtata\n+ttttctttggggttacctctctgaatattatgtaaacaatccaaagaaatgattgtatta\n+agatttgtgaataaatttttagaaatctgattggcatattgagatatttaaggttgaatg\n+tttgtccttaggataggcctatgtgctagcccacaaagaatattgtctcattagcctgaa\n+tgtgccataagactgaccttttaaaatgttttgagggatctgtggatgcttcgttaattt\n+gttcagccacaatttattgagaaaatattctgtgtcaagcactgtgggttttaatatttt\n+taaatcaaacgctgattacagataatagtatttatataaataattgaaaaaaattttctt\n+ttgggaagagggagaaaatgaaataaatatcattaaagataactcaggagaatcttcttt\n+acaattttacgtttagaatgtttaaggttaagaaagaaatagtcaatatgcttgtataaa\n+acactgttcactgttttttttaaaaaaaaaacttgatttgttattaacattgatctgctg\n+acaaaacctgggaatttgggttgtgtatgcgaatgtttcagtgcctcagacaaatgtgta\n+tttaacttatgtaaaagataagtctggaaataaatgtctgtttatttttgtactatttaa\n+aaaaaaaaaaaaaaatcgatgtcgactcgagtc\n+>HS08198\n+agcgggcggcggtcgtgggcggggttgcaggcgaggctcaacgaacgctggtctgaccgt\n+cggcgctccctgttgccgggccctgagcaagtggcttcatgaaccccgtgacgttggcca\n+tggagataagaccactgggtgatggtttaaggaagataacgtgtaaagggctaaggactg\n+tcggtggaaatcaggggtgcaggagaaatggataaacagccagaggtcaactcggacttt\n+gtacataggacatggtgccaggccctgccaggaagtgcagatcgaagctaggctcacgag\n+gaggctggaggtggggggtggggaggcaacggatggacatggacttcctgggctgggctc\n+tgtgacagcagagtagactctgtcctgggacttggtggtgctacccttggcctcccacag\n+tcctgccaccctgctgccgccaccatgctgccccctgggactgcgaccctcttgactctg\n+ctcctggcagctggctcgctgggccagaagcctcagaggccacgccggcccgcatccccc\n+atcagcaccatccagcccaaggccaattttgatgcgcagcaggtagaagttggggggggt\n+agagggaggcaggtagaagttgtgggaggggtagagggagacaggtagaagttgttgcgg\n+gggagagggaagcaggtgaagttgtggggggtgtagagggaagcaggtgaggggccctcc\n+cacagtgccctcgagttctcccatggtctgcccccagtttgcagggacctggctccttgt\n+ggctgtgggctccgcttgccgtttcctgcaggagcagggccaccgggccgaggccaccac\n+actgcatgtggctccccagggcacagccatggctgtcagtaccttccgaaagctgtgagt\n+cccagagcagccctgcaccctaaccccaaccctcctctcagcccccggacttcagccctg\n+ctctggcccctgaccccaccccggctgtggcctggactaggattcctggttggggtctcc\n+cagcctgtggtgcctcctccccgcccccccagggatgggatctgctggcaggtgcgccag\n+ctctatggagacacaggggtcctcggccgcttcctgcttcaaggtgaggcaggggctgca\n+ggtcatgtgggtgggggatgacgcagccactgtggctctctgacatggctactgtggctc\n+tgcccagcccgaggcgcccgaggggctgtgcacgtggttgtcgctgagaccgactaccag\n+agtttcgctgtcctgtacctggagcgggcggggcagctgtcagtgaagctctacggtatg\n+tgggggccagcctctgtgaccaggcaggcgctcaagctctgcacactcactgggccaccc\n+cgaggggctgggtgagccatggggacacacttcctttctcccatcctgatcctcctgcta\n+agcaggggcccagggagtagtgacagacaggcctggtgtgggagcagggaggagggcccc\n+gaggggcaggggacacacagaccccgttcccagagccctccacgccgcctggtgccagga\n+ccccaggaaccctgtctgccctgcagcccgctcgctccctgtgagcgactcggtcctgag\n+tgggtttgagcagcgggtccaggaggcccacctgactgaggaccagatcttctacttccc\n+caagtacggtgagtgtccccagcaggtccccagctcagccacccccactctctggctgat\n+gtccagcctgacccctgccttggcgccccaggcttctgcgaggctgcagaccagttccac\n+gtcctggacggtgagtgcacagcgggggcaagcatggcggcgtggtgaggggggccactc\n+gcaccggctgagtctcgtctctgctgcagaagtgaggaggtgaggccggcacacagctcc\n+agtgctgagaagtcagtgccccgagagacgaccccaccagtggggtgcccgctgcctgtc\n+ctccgtgaaaccagcctcagatcagggccctgccacccagggcaggggatcttctgccgg\n+ctgccccagaggacagtgggtggagtggtacctacttattaaatgtctcagacccctctc\n+tgactcttctgtccactctggaccggcgccagtaccaccaaggccctctctgcccccacc\n+ccgcctctttaaaagcccggcgctccctgttggctggagtccacgcagggtcactgggcc\n+gatttcggctcttgggatttgggaggggagatcctctctggcatatgccatcttgtgccc\n+tgctggacctgggggcgtccacgtcactccaaggctgctcttgcctgggccatgcctgca\n+gccc\n'
b
diff -r 000000000000 -r 2defe48e4c7a test-data/summary.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/summary.txt Thu Aug 02 09:57:30 2018 -0400
b
@@ -0,0 +1,27 @@
+==================================================================
+                                                   o           all
+------------------------------------------------------------------
+                  Max number of transcripts per gene             1
+                                      Mean exon size         151.1
+           Mean gene locus size (first to last exon)        3294.0
+              Mean number of distinct exons per gene           8.0
+                 Mean number of transcripts per gene           1.0
+                     Mean transcript size (UTR, CDS)        1208.5
+                            Number of distinct exons            16
+                                     Number of genes             2
+Number of genes with alternative transcript variants      0 (0.0%)
+                          Number of multi-exon genes    2 (100.0%)
+                     Number of predicted transcripts             2
+                         Number of single-exon genes      0 (0.0%)
+------------------------------------------------------------------
+=====================================================================================================
+     o    % GC    % of genome    Average size (bp)    Median size (bp)    Number    Total length (Mb)
+-----------------------------------------------------------------------------------------------------
+  Exon     50%            20%                  151                 133        16                  0Mb
+  Gene     43%            56%                3,294               3,294         2                  0Mb
+Intron     39%            35%                  297                 250        14                  0Mb
+-----------------------------------------------------------------------------------------------------
+Exon_Length: Min=71 Max=410 N=15 Mean=153.2 SD=92.1160138087 Median=138.0 Sum=2298
+Intron_Length: Min=83 Max=732 N=13 Mean=319.076923077 SD=211.891649212 Median=271.0 Sum=4148
+Gene_Length: Min=579 Max=1719 N=2 Mean=1149.0 SD=570.0 Median=1149.0 Sum=2298
+Exon_Count: Min=6 Max=9 N=2 Mean=7.5 SD=1.5 Median=7.5 Sum=15
b
diff -r 000000000000 -r 2defe48e4c7a tool-data/all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample Thu Aug 02 09:57:30 2018 -0400
b
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
b
diff -r 000000000000 -r 2defe48e4c7a tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Thu Aug 02 09:57:30 2018 -0400
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 2defe48e4c7a tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Thu Aug 02 09:57:30 2018 -0400
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+</tables>