Repository 'gffread'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/gffread

Changeset 0:5f6e6582c01d (2015-05-14)
Next changeset 1:48fe74f391ab (2015-11-11)
Commit message:
planemo upload commit a52cc16ed8d0d60e99742b55fccbdedcbb64b82c
added:
cuff_macros.xml
gffread.xml
test-data/Homo_sapiens.GRCh37.71.dna.chromosome.19.fa
test-data/Homo_sapiens.GRCh37.71.dna.chromosome.19.fa.fai
test-data/Homo_sapiens.GRCh37_19.71.gff3
test-data/Homo_sapiens.GRCh37_19.71.gtf
test-data/chr_replace
tool-data/fasta_indexes.loc.sample
tool_data_table_conf.xml.sample
tool_dependencies.xml
b
diff -r 000000000000 -r 5f6e6582c01d cuff_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cuff_macros.xml Thu May 14 13:08:14 2015 -0400
[
@@ -0,0 +1,91 @@
+<macros>
+  <token name="@VERSION@">2.2.1</token>
+  <xml name="requirements">
+    <requirements>
+      <requirement type="package" version="2.2.1">cufflinks</requirement>
+      <yield />
+    </requirements>
+  </xml>
+  <xml name="stdio">
+    <stdio>
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+        <regex match="Error:" />
+        <regex match="Exception:" />
+    </stdio>
+  </xml>
+  <xml name="condition_inputs">
+    <!-- DEFAULT : use BAM/SAM files -->
+    <conditional name="in_type">
+        <param name="set_in_type" type="select" label="Input data type"
+            help="CuffNorm supports either CXB (from cuffquant) or SAM/BAM input files. Mixing is not supported. Default: SAM/BAM">
+            <option value="BAM">SAM/BAM</option>
+            <option value="CXB">Cuffquant (CXB)</option>
+            <option value="CONDITION_LIST">List of single replicate conditions</option>
+            <option value="CONDITION_REPLICATE_LIST">List of multiple replicate conditions</option>
+        </param>
+        <when value="BAM">
+            <repeat name="conditions" title="Condition" min="2">
+                <param name="name" title="Condition name" type="text" label="Name"/>
+                <param name="samples" label="Replicates" type="data" format="sam,bam" multiple="true"/>
+            </repeat>
+        </when>
+        <when value="CXB">
+            <repeat name="conditions" title="Condition" min="2">
+                <param name="name" title="Condition name" type="text" label="Name"/>
+                <param name="samples" label="Replicates" type="data" format="cxb" multiple="true"/>
+            </repeat>
+        </when>
+        <when value="CONDITION_LIST">
+            <param name="conditions" title="List of Conditions" type="data_collection" collection_type="list" />
+        </when>
+        <when value="CONDITION_REPLICATE_LIST">
+            <param name="conditions" title="List of Conditions" type="data_collection" collection_type="list:list" />
+        </when>
+    </conditional>
+  </xml>
+  <token name="@CONDITION_SAMPLES@">
+            #if $in_type.set_in_type in ['BAM', 'CXB']
+                #for $condition in $in_type.conditions:
+                    #set samples = ','.join( [ str( $sample ) for $sample in $condition.samples ] )
+                    $samples
+                #end for
+            #elif $in_type.set_in_type == 'CONDITION_LIST'
+                #for $sample in $in_type.conditions:
+                    $sample
+                #end for
+            #elif $in_type.set_in_type == 'CONDITION_REPLICATE_LIST'
+                #for $condition_list in $in_type.conditions:
+                    #set samples = ','.join( [ str( $sample ) for $sample in $condition_list ] )
+                    $samples
+                #end for
+            #end if
+  </token>
+  <token name="@CONDITION_LABELS@">
+            #import re
+            #if $in_type.set_in_type in ['BAM', 'CXB']
+                #set labels = '\'' + '\',\''.join( [ str( $condition.name ) for $condition in $in_type.conditions ] ) + '\''
+            #elif $in_type.set_in_type in ['CONDITION_LIST', 'CONDITION_REPLICATE_LIST']
+                #set labels = '\'' + '\',\''.join( map(lambda x: re.sub('[^\w\-_]', '_', x), $in_type.conditions.keys() ) ) + '\''
+            #end if
+            --labels $labels
+  </token>
+  <xml name="cufflinks_gtf_inputs">
+    <param format="gtf" name="inputs" type="data" label="GTF file(s) produced by Cufflinks" help="" multiple="true" />
+    <repeat name="additional_inputs" title="Additional GTF Inputs (Lists)">
+      <param format="gtf" name="additional_inputs" type="data_collection" label="GTF file(s) produced by Cufflinks" help="" />
+    </repeat>
+  </xml>
+  <token name="@CUFFLINKS_GTF_INPUTS@">
+            ## Inputs.
+            #for $input_file in $inputs:
+                "${input_file}"
+            #end for
+            #for $additional_input in $additional_inputs:
+                #for $input_file in $additional_input.additional_inputs:
+                  "${input_file}"
+                #end for
+            #end for
+  </token>
+  <token name="@HAS_MULTIPLE_INPUTS@">getattr(inputs, "__len__", [].__len__)() >= 2</token>
+</macros>
\ No newline at end of file
b
diff -r 000000000000 -r 5f6e6582c01d gffread.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gffread.xml Thu May 14 13:08:14 2015 -0400
[
b'@@ -0,0 +1,444 @@\n+<tool id="gffread" name="gffread" version="@VERSION@.0">\n+    <description>Filters and/or converts GFF3/GTF2 records</description>\n+    <expand macro="requirements" />\n+    <expand macro="stdio" />\n+    <macros>\n+        <import>cuff_macros.xml</import>\n+        <xml name="fasta_output_select">\n+            <param name="fa_outputs" type="select" display="checkboxes" multiple="true" label="Select fasta outputs">\n+                <option value="-w exons.fa">fasta file with spliced exons for each GFF transcript (-w exons.fa)</option>\n+                <option value="-x cds.fa">fasta file with spliced CDS for each GFF transcript (-x cds.fa)</option>\n+                <option value="-y pep.fa">protein fasta file with the translation of CDS for each record (-y pep.fa)</option>\n+                <option value="-W">for each fasta: record the exon coordinates projected onto the spliced sequence (-W)</option>\n+            </param>\n+        </xml>\n+        <xml name="ref_filtering_select">\n+            <param name="ref_filtering" type="select" display="checkboxes" multiple="true" label="reference based filters">\n+                <option value="-N">discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus, i.e. not GT-AG, GC-AG or AT-AC (-N)</option>\n+                <option value="-J">discard any mRNAs that either lack initial START codon or the terminal STOP codon, or have an in-frame stop codon (-J)</option>\n+                <option value="-V">discard any mRNAs with CDS having in-frame stop codons (-V)</option>\n+                <option value="-H">check and adjust the starting CDS phase if the original phase leads to a translation with an in-frame stop codon (-H with -V)</option>\n+                <!-- gffread bug: B not in  missing from param to the arg parser \n+                <option value="-B">single-exon transcripts are also checked on the opposite strand (-B with -V)</option>\n+                -->\n+            </param>\n+        </xml>\n+        <xml name="trackname">\n+            <param name="tname" type="text" value="" size="30" optional="true" label="Trackname to use in the second column of each GFF output line" help="(-t track_name}">\n+                <validator type="regex">\\w+</validator>\n+            </param>\n+        </xml>\n+        <xml name="merge_opts">\n+             <option value="-K">also collapse shorter, fully contained transcripts with fewer introns than the container (-K)</option>\n+             <option value="-Q">remove the containment restriction: multi-exon transcripts will be collapsed if just their introns match, while single-exon transcripts can partially overlap 80% (-Q)</option>\n+             <option value="-d dupinfo">output collapsing info (-d dupinfo)</option>\n+        </xml>\n+        <xml name="cluster_opts">\n+             <option value="--force-exons"> make sure that the lowest level GFF features are printed as \'exon\' features (--force-exons)</option>\n+             <option value="-Z">merge close exons into a single exon (for intron size &lt; 4) (-Z)</option>\n+        </xml>\n+        <xml name="merge_opt_sel">\n+            <param name="merge_options" type="select" display="checkboxes" multiple="true" label="Merge options">\n+                <expand macro="cluster_opts" />\n+                <expand macro="merge_opts" />\n+            </param>\n+        </xml>\n+        <xml name="cluster_opt_sel">\n+            <param name="merge_options" type="select" display="checkboxes" multiple="true" label="Cluster options">\n+                <expand macro="cluster_opts" />\n+            </param>\n+        </xml>\n+    </macros>\n+    <command>\n+<![CDATA[\n+    #if $reference_genome.source == \'history\':\n+        ln -s $reference_genome.genome_fasta genomeref.fa &&\n+    #end if\n+    gffread $input \n+    #if $reference_genome.source == \'cached\':\n+        -g "${reference_genome.fasta_indexes.fields.path}"\n+        #if $reference_genome.ref_filtering and str($reference_genome.ref_filterin'..b'nomic sequences\n+      for all input mappings, OR a directory with single-fasta files\n+      (one per genomic sequence, with file names matching sequence names)\n+  -s  <seq_info.fsize> is a tab-delimited file providing this info\n+      for each of the mapped sequences:\n+      <seq-name> <seq-length> <seq-description>\n+      (useful for -A option with mRNA/EST/protein mappings)\n+  -i  discard transcripts having an intron larger than <maxintron>\n+  -r  only show transcripts overlapping coordinate range <start>..<end>\n+      (on chromosome/contig <chr>, strand <strand> if provided)\n+  -R  for -r option, discard all transcripts that are not fully \n+      contained within the given range\n+  -U  discard single-exon transcripts\n+  -C  coding only: discard mRNAs that have no CDS feature\n+  -F  full GFF attribute preservation (all attributes are shown)\n+  -G  only parse additional exon attributes from the first exon\n+      and move them to the mRNA level (useful for GTF input)\n+  -A  use the description field from <seq_info.fsize> and add it\n+      as the value for a \'descr\' attribute to the GFF record\n+  \n+  -O  process also non-transcript GFF records (by default non-transcript\n+      records are ignored)\n+  -V  discard any mRNAs with CDS having in-frame stop codons\n+  -H  for -V option, check and adjust the starting CDS phase\n+      if the original phase leads to a translation with an \n+      in-frame stop codon\n+  -B  for -V option, single-exon transcripts are also checked on the\n+      opposite strand\n+  -N  discard multi-exon mRNAs that have any intron with a non-canonical\n+      splice site consensus (i.e. not GT-AG, GC-AG or AT-AC)\n+  -J  discard any mRNAs that either lack initial START codon\n+      or the terminal STOP codon, or have an in-frame stop codon\n+      (only print mRNAs with a fulll, valid CDS)\n+  --no-pseudo: filter out records matching the \'pseudo\' keyword\n+ \n+  -M/--merge : cluster the input transcripts into loci, collapsing matching\n+       transcripts (those with the same exact introns and fully contained)\n+  -d <dupinfo> : for -M option, write collapsing info to file <dupinfo>\n+  --cluster-only: same as --merge but without collapsing matching transcripts\n+  -K  for -M option: also collapse shorter, fully contained transcripts\n+      with fewer introns than the container\n+  -Q  for -M option, remove the containment restriction:\n+      (multi-exon transcripts will be collapsed if just their introns match,\n+      while single-exon transcripts can partially overlap (80%))\n+ \n+  --force-exons: make sure that the lowest level GFF features are printed as \n+      "exon" features\n+  -E  expose (warn about) duplicate transcript IDs and other potential \n+      problems with the given GFF/GTF records\n+  -D  decode url encoded characters within attributes\n+  -Z  merge close exons into a single exon (for intron size<4)\n+  -w  write a fasta file with spliced exons for each GFF transcript\n+  -x  write a fasta file with spliced CDS for each GFF transcript\n+  -W  for -w and -x options, also write for each fasta record the exon\n+      coordinates projected onto the spliced sequence\n+  -y  write a protein fasta file with the translation of CDS for each record\n+  -L  Ensembl GTF to GFF3 conversion (implies -F; should be used with -m)\n+  -m  <chr_replace> is a reference (genomic) sequence replacement table with\n+      this format:\n+      <original_ref_ID> <new_ref_ID>\n+      For example from UCSC naming to Ensembl naming:\n+      chr1\t1\n+      chr2\t2\n+      GFF records on reference sequences that are not found among the\n+      <original_ref_ID> entries in this file will be filtered out\n+  -o  the "filtered" GFF records will be written to <outfile.gff>\n+      (use -o- for printing to stdout)\n+  -t  use <trackname> in the second column of each GFF output line\n+  -T  -o option will output GTF format instead of GFF3\n+\n+]]>\n+    </help>\n+    <citations>\n+        <citation type="doi">10.1038/nbt.1621</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r 5f6e6582c01d test-data/Homo_sapiens.GRCh37.71.dna.chromosome.19.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Homo_sapiens.GRCh37.71.dna.chromosome.19.fa Thu May 14 13:08:14 2015 -0400
b
b'@@ -0,0 +1,10000 @@\n+>19 dna:chromosome chromosome:GRCh37:19:1:59128983:1 REF\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNN'..b'GTGTCCTCGTGGGGTTTGGGAGCCGGGTCG\n+GCGGCCTCCTCCCCCAAGCTTCATCCAGCCCTGGAGAGACGGGGACCTGCGTCGGCTTTG\n+GGAACTTCAGGGGAAGAACCCTGAGCTCGGGGCAGGAGAATGACATGGTCTGTCTGGGTT\n+CCCATCCCACTGTTTCACTTATGGCTGGAGGAGCCTCAGTTTCCGTGTCAGAGCCGAGAG\n+AGAGGCTGGGCGGCTGTTGGAGGAGGGCTGAGCTCACATCGGTGTCAGGTGCCGGGGGCC\n+CCACTGAAGCCACTCTGGAGTGAGAGGGGGTCCGAGGCTTCTTCCGGTCCCCCAGGTGTG\n+AGAGCTGAGGCCGGAAGGTTTCCCCAGGGACCCAGCCTCATCCCGGCCTCCGGAGCTGGT\n+CTCCCTCCCTGCCTGCGAGGGCCAAGCTGGACAAAGTGGCAGGACCCCCGCGTTTCCGAG\n+GAAGGGGCCCTCGTAGAGGCCTCACTGTTACCTGTCGGGGATGGGTTTTAAATACAGGAT\n+TTAATGATGGTCACTGATGTTTCTGTGACAGCTGAGGGGGGCCTGTCCCTGCTCAGCACT\n+GGGGTTGGACAAGGAGGGCACAGGGTGCCCGGGGAGTGCTGGGGCCCGCCTGGGTCCCTA\n+TGGTGGCCGGATGGGGAAAGGGCTGCCGGACGCGGCTGCACAGAGTGTGAGGGGGAGTGG\n+GGTTGGGCTTTGGGCCTTCCCCTGACCCCAGGTCCCCTCTCTGGGGTCCTGTCTTCTGGA\n+GCTGCCTCCCCCTCCCCAGGGCGAGAGGCAGGAGAGGGTCTCCCAAGGTCACTCAGTCTA\n+GTGTTGGGCAAGGAGAGGGTTGGGGTGGAGAGGGTTGGGGTGCACCGGCAGAGACAGTGA\n+CGCCCTGCAGGGCTCTGGGGTCCCCTCTGCCCCAGGCCTCGGTTTCCCCTTGGTAGGGGA\n+CATGCGTGGGCGCCCTTCTTCCTTCTGACCTCTGGCTGGTGACGACGAGGGTTGCGGCCC\n+CAAGCCCTGCTTGGAGGCATTGTGGTTGGAACGGGCAGGCCCAGCCCCACAAGGGACCCT\n+CAGGGTGGCCACCCCACCTGTCCCTTCTCCTAGGCCCCTCTGCTCTGACCCCCGTAATCC\n+CCAGGAATACTGCCCTGCTGGGCAAGATGGGCTGAGGAGTCCTTTCTGGGGACACAGGAG\n+CAGAAGGCTCAAGGGAGGTGGTTTCCAGACAGTGTGGGCCGTGGATAAGAGGACAGTCCT\n+GGGGTCTCATCCTGCAGTCCCACTTCCATCAGAGGGGAGGTGTGGTACCTGCCCCCAGCT\n+CACCCCAAGAATAAAGGGGGAGAATGTCTGTTAATCATCACCTGGAAAGGAGGCCTTCAT\n+GTTATATGGCCGTTTAAAACCAGGTACCCCTTGGCGGTTTCTAGGTCCTCCTGGTGGTTT\n+CTAGGTCTCCTTGGCGGTTTCTAGGTCCCCCTTGACAGTTTCTAGGTCTCCTTGGTGGTT\n+TCTAGGTCTTCCTGGTGGTTTGTAGATCTCCTTGGCGGTTTCTAGGTCCTCCTGGTGGTT\n+TCTAGGTCCCCCTTGGTGGTTTCTAGGTCCTCCTCGTGGTTTCTAGGTCTCCTTGGCGGT\n+TTCTAGGTCCCCCTTGACAGTTTCTAGGTCTCCTTGGTGGTTTCTAGGTCTTCCTGGTGG\n+TTTCTAGGTCCTCCTGGTGGTTTCTAGGTCCTCCTGGTGGTTTCTAGGTCCCCCTTGGTG\n+GTTTCTAGGCCCTCCTGGTGGTTTCTAGGTCTCCTTGGCAGTTTCTAGGTCCTCCTGGTG\n+GTTTCTAGGTCTCCTTGGCGGTTTCTAGGTCTCCTTGGCAGTTTCTAGGTCCCCCTTGGT\n+GGTCTCTGGAGCCCCCTGGGAGTTTCTCCTCGCCCCTCGTGGGGGAGGTTTCTAGGCCCT\n+GTGGCCTTCTCAACTGCTCTTTCCTGCTGGTTTTGTCCCAAGACTGGTGCTCGAAGGTGG\n+GAGGCAGGTCTCAATCCAGAGGGTGAGGCTGGGGCTGAGCTCCACGGTGAGCCCCCTCGG\n+GGCTGAGGGGCAGGGGGCCAGCGGCAGGCGGGTGGCATCCCCTCAGCCCACTCGGTGCTG\n+GGGCCCTGGGGGTCTCGTTGCTGAGCAGATGTGTCGGACTGGGGGCCTGAGCCCTCATGT\n+CCTGTCTAGACTCAGGGAAAGGAAGGCCTTAAATTTCTAGTCCTTTTAGTCATTTGATTT\n+CACCCTTCAAGCTTGTGAGGGGACCCCTCCCCACCCCGAGATCAAGTCAGCCTTTTTTTT\n+TTTTGAGACAGAGTCTCACTCTGTCACCCAGGCTGGAGTGCAATGGTGCAGTTTTGGCTC\n+ACTGCAGCCTCGACTTCATGGGCTCGAGCAATCCTCCTACCTCAGCCTCCCAAGTAGCTG\n+GGTCCACAGGTGCGCACCACCACGCTGGCTAATTTTTGTATTTTTTGTAGAGACGGGATC\n+TCACTATCTTGCCCAGGCTGGTCCTGAACTCCTGAGCTTCTGTGATCCTTTAGTCTTGGC\n+CTCCCAAGGTGCTGGCATTAGAAGCATGAGCCCCTGCACCCACCCTTCCCTTCTTTTTTG\n+AGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCGATGGCGCGATCTCGGCTCACTGC\n+AAGCTCCGCCTCCCGGGTTCATGCCGTTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGCCT\n+ACAGGCGCCCGCCACCTCACACAAAAAAATAATACATTTTTGTATTTTTAGTAGAGACGG\n+GGTTTCACCGTGTTGGCCAGGATGGTCTCGATCTCTTGACCTTGTAATCTGCCCGCCTCA\n+GCCTCCTTAAGCCCTGGAATTACAGGCGTGAGCCACCGTGCCCAGCCTGAGTCAGCCCTT\n+CTCTAGGTTTTATCCCCAGCCTGGGTGGTTCTTGGGGGGTGTCCCGAAGGCTCAGCCCCT\n+CCCCATCCCGAGGCGGTGGATGCTTGGAAGAAGAGACTGAGGTTCTCAGGGGCTGCAGGA\n+ACTTGTCCAAGGTCACTGTGCCAGCAGGCGGGTGCCAGCCCAGGTCTGGCTGATGCCACC\n+ACGATGTGAGTCGCTGGGTCCCTTCCAGCGTTTGGCTCTTGCAGACCGAGCTGCTGTGAA\n+CATTTTGGTACAAATGGCTTTTTAATTTTTTTCTCTTCCTTTTGGACCGTTTCCTCGGGA\n+TCATTTCCCCGAAGTGGAGCGTCTGGGTCACCCCGCAGACGTGGTGTTAACAGCTTTTCC\n+CTCGAGGAGCCGTCCAGAAGCAAGGAGCCGACTTTCGGCGGCTCCTGAAACAAAGGTCCC\n+GGCGGCCACGCCAGCGATGAGCTCCCAGGTTTTTCTTTATTGGTGTTAACTTTGGGGAGT\n+CCCTCCAGTCCCTCAGCATCTCCTCCGAAAGGATGTGGGGACCCCGGCCATGATAGGGAG\n+GGGCTGGGCCTGGGGTGGGGATAAGGCCTCAAGAATTGGCCGGGCGCGGTGGCTCACGCC\n+TGTAATCCCAGCACTTTAGGAGGCCAAGGCGGGCGGATCACGAGGTCAGGAGTTCGAGAC\n+CATCCTGGCTAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAAAAAAAATT\n+ATCTGGGCGTGGTGGCGGGCGCCTATAGTCCCAGCTACTCGGGAGGGTGAGGCAAGAGAA\n+TGGTGTGAACCCTGGAGGCGGAGCTTGCAGTGAGCCGAGATTGTGCCATCGCACTCCAGC\n+CTGGGCGACAGAGTGAGACTCTGTCTCAAAAAAGAAAACCTCAAGAACTAAGAAGGCTGA\n+GTTGGTACAGGGATGGGAAGGGGTCCCGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTCAGAGGGTCTCATGTTACCCAGGCTGGTCTTGAACTCCTGGGC\n'
b
diff -r 000000000000 -r 5f6e6582c01d test-data/Homo_sapiens.GRCh37.71.dna.chromosome.19.fa.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Homo_sapiens.GRCh37.71.dna.chromosome.19.fa.fai Thu May 14 13:08:14 2015 -0400
b
@@ -0,0 +1,1 @@
+19 599940 57 60 61
b
diff -r 000000000000 -r 5f6e6582c01d test-data/Homo_sapiens.GRCh37_19.71.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Homo_sapiens.GRCh37_19.71.gff3 Thu May 14 13:08:14 2015 -0400
b
b'@@ -0,0 +1,313 @@\n+# gffread test-data/Homo_sapiens.GRCh37_19.71.gtf -o test-data/Homo_sapiens.GRCh37_19.71.gff3\n+##gff-version 3\n+19\tsnRNA\ttranscript\t223158\t223261\t.\t-\t.\tID=ENST00000410397;geneID=ENSG00000222329;gene_name=U6\n+19\tsnRNA\texon\t223158\t223261\t.\t-\t.\tParent=ENST00000410397\n+19\tunprocessed_pseudogene\ttranscript\t229640\t230165\t.\t-\t.\tID=ENST00000587910;geneID=ENSG00000267600;gene_name=AC098474.1\n+19\tunprocessed_pseudogene\texon\t229640\t229709\t.\t-\t.\tParent=ENST00000587910\n+19\tunprocessed_pseudogene\texon\t230084\t230165\t.\t-\t.\tParent=ENST00000587910\n+19\tprocessed_pseudogene\ttranscript\t239145\t239247\t.\t-\t.\tID=ENST00000588755;geneID=ENSG00000267305;gene_name=CTD-3113P16.7\n+19\tprocessed_pseudogene\texon\t239145\t239247\t.\t-\t.\tParent=ENST00000588755\n+19\tprocessed_pseudogene\ttranscript\t279495\t280170\t.\t+\t.\tID=ENST00000589981;geneID=ENSG00000267447;gene_name=VN2R11P\n+19\tprocessed_pseudogene\texon\t279495\t280170\t.\t+\t.\tParent=ENST00000589981\n+19\tprotein_coding\tmRNA\t281043\t291386\t.\t-\t.\tID=ENST00000269812;geneID=ENSG00000141934;gene_name=PPAP2C\n+19\tprotein_coding\texon\t281043\t281537\t.\t-\t.\tParent=ENST00000269812\n+19\tprotein_coding\texon\t282134\t282310\t.\t-\t.\tParent=ENST00000269812\n+19\tprotein_coding\texon\t282752\t282809\t.\t-\t.\tParent=ENST00000269812\n+19\tprotein_coding\texon\t287474\t287751\t.\t-\t.\tParent=ENST00000269812\n+19\tprotein_coding\texon\t288020\t288171\t.\t-\t.\tParent=ENST00000269812\n+19\tprotein_coding\texon\t291285\t291386\t.\t-\t.\tParent=ENST00000269812\n+19\tprotein_coding\tCDS\t281391\t281537\t.\t-\t0\tParent=ENST00000269812\n+19\tprotein_coding\tCDS\t282134\t282310\t.\t-\t0\tParent=ENST00000269812\n+19\tprotein_coding\tCDS\t282752\t282809\t.\t-\t1\tParent=ENST00000269812\n+19\tprotein_coding\tCDS\t287474\t287751\t.\t-\t0\tParent=ENST00000269812\n+19\tprotein_coding\tCDS\t288020\t288171\t.\t-\t2\tParent=ENST00000269812\n+19\tprotein_coding\tCDS\t291285\t291336\t.\t-\t0\tParent=ENST00000269812\n+19\tprotein_coding\tmRNA\t281345\t291393\t.\t-\t.\tID=ENST00000434325;geneID=ENSG00000141934;gene_name=PPAP2C\n+19\tprotein_coding\texon\t281345\t281537\t.\t-\t.\tParent=ENST00000434325\n+19\tprotein_coding\texon\t282134\t282310\t.\t-\t.\tParent=ENST00000434325\n+19\tprotein_coding\texon\t282752\t282809\t.\t-\t.\tParent=ENST00000434325\n+19\tprotein_coding\texon\t287474\t287751\t.\t-\t.\tParent=ENST00000434325\n+19\tprotein_coding\texon\t288020\t288171\t.\t-\t.\tParent=ENST00000434325\n+19\tprotein_coding\texon\t291326\t291393\t.\t-\t.\tParent=ENST00000434325\n+19\tprotein_coding\tCDS\t281391\t281537\t.\t-\t0\tParent=ENST00000434325\n+19\tprotein_coding\tCDS\t282134\t282310\t.\t-\t0\tParent=ENST00000434325\n+19\tprotein_coding\tCDS\t282752\t282809\t.\t-\t1\tParent=ENST00000434325\n+19\tprotein_coding\tCDS\t287474\t287751\t.\t-\t0\tParent=ENST00000434325\n+19\tprotein_coding\tCDS\t288020\t288055\t.\t-\t0\tParent=ENST00000434325\n+19\tprotein_coding\tmRNA\t281388\t291200\t.\t-\t.\tID=ENST00000327790;geneID=ENSG00000141934;gene_name=PPAP2C\n+19\tprotein_coding\texon\t281388\t281537\t.\t-\t.\tParent=ENST00000327790\n+19\tprotein_coding\texon\t282134\t282310\t.\t-\t.\tParent=ENST00000327790\n+19\tprotein_coding\texon\t282752\t282809\t.\t-\t.\tParent=ENST00000327790\n+19\tprotein_coding\texon\t287474\t287751\t.\t-\t.\tParent=ENST00000327790\n+19\tprotein_coding\texon\t288020\t288171\t.\t-\t.\tParent=ENST00000327790\n+19\tprotein_coding\texon\t290952\t291200\t.\t-\t.\tParent=ENST00000327790\n+19\tprotein_coding\tCDS\t281391\t281537\t.\t-\t0\tParent=ENST00000327790\n+19\tprotein_coding\tCDS\t282134\t282310\t.\t-\t0\tParent=ENST00000327790\n+19\tprotein_coding\tCDS\t282752\t282809\t.\t-\t1\tParent=ENST00000327790\n+19\tprotein_coding\tCDS\t287474\t287751\t.\t-\t0\tParent=ENST00000327790\n+19\tprotein_coding\tCDS\t288020\t288171\t.\t-\t2\tParent=ENST00000327790\n+19\tprotein_coding\tCDS\t290952\t291066\t.\t-\t0\tParent=ENST00000327790\n+19\tprotein_coding\tmRNA\t281991\t287636\t.\t-\t.\tID=ENST00000586998;geneID=ENSG00000141934;gene_name=PPAP2C\n+19\tprotein_coding\texon\t281991\t282310\t.\t-\t.\tParent=ENST00000586998\n+19\tprotein_coding\texon\t282752\t282809\t.\t-\t.\tParent=ENST00000586998\n+19\tprotein_coding\texon\t287474\t287636\t.\t-\t.\tParent=ENST00000586998\n+19\tprotein_coding\tCDS\t282125\t282310\t.\t-\t0\tParent=ENST00000586998\n+19\tprotein_coding\tCDS\t282752\t282809\t.\t-\t1\tParent=ENS'..b'72501\t.\t-\t2\tParent=ENST00000315489\n+19\tprotein_coding\tCDS\t474621\t474747\t.\t-\t0\tParent=ENST00000315489\n+19\tprotein_coding\tmRNA\t463467\t474880\t.\t-\t.\tID=ENST00000382696;geneID=ENSG00000181781;gene_name=ODF3L2\n+19\tprotein_coding\texon\t463467\t464364\t.\t-\t.\tParent=ENST00000382696\n+19\tprotein_coding\texon\t467649\t467762\t.\t-\t.\tParent=ENST00000382696\n+19\tprotein_coding\texon\t474621\t474880\t.\t-\t.\tParent=ENST00000382696\n+19\tprotein_coding\tCDS\t463847\t464364\t.\t-\t2\tParent=ENST00000382696\n+19\tprotein_coding\tCDS\t467649\t467762\t.\t-\t2\tParent=ENST00000382696\n+19\tprotein_coding\tCDS\t474621\t474747\t.\t-\t0\tParent=ENST00000382696\n+19\tretained_intron\ttranscript\t464146\t472631\t.\t-\t.\tID=ENST00000591681;geneID=ENSG00000181781;gene_name=ODF3L2\n+19\tretained_intron\texon\t464146\t464364\t.\t-\t.\tParent=ENST00000591681\n+19\tretained_intron\texon\t467649\t467762\t.\t-\t.\tParent=ENST00000591681\n+19\tretained_intron\texon\t472394\t472631\t.\t-\t.\tParent=ENST00000591681\n+19\tprocessed_transcript\ttranscript\t489176\t505342\t.\t+\t.\tID=ENST00000587541;geneID=ENSG00000099866;gene_name=MADCAM1\n+19\tprocessed_transcript\texon\t489176\t490039\t.\t+\t.\tParent=ENST00000587541\n+19\tprocessed_transcript\texon\t501669\t501929\t.\t+\t.\tParent=ENST00000587541\n+19\tprocessed_transcript\texon\t504745\t505342\t.\t+\t.\tParent=ENST00000587541\n+19\tantisense\ttranscript\t490046\t507813\t.\t-\t.\tID=ENST00000592413;geneID=ENSG00000266933;gene_name=AC005775.2\n+19\tantisense\texon\t490046\t490353\t.\t-\t.\tParent=ENST00000592413\n+19\tantisense\texon\t501541\t501624\t.\t-\t.\tParent=ENST00000592413\n+19\tantisense\texon\t507376\t507813\t.\t-\t.\tParent=ENST00000592413\n+19\tprotein_coding\tmRNA\t496454\t505207\t.\t+\t.\tID=ENST00000346144;geneID=ENSG00000099866;gene_name=MADCAM1\n+19\tprotein_coding\texon\t496454\t496551\t.\t+\t.\tParent=ENST00000346144\n+19\tprotein_coding\texon\t497833\t498117\t.\t+\t.\tParent=ENST00000346144\n+19\tprotein_coding\texon\t498496\t498825\t.\t+\t.\tParent=ENST00000346144\n+19\tprotein_coding\texon\t504745\t505207\t.\t+\t.\tParent=ENST00000346144\n+19\tprotein_coding\tCDS\t496500\t496551\t.\t+\t0\tParent=ENST00000346144\n+19\tprotein_coding\tCDS\t497833\t498117\t.\t+\t2\tParent=ENST00000346144\n+19\tprotein_coding\tCDS\t498496\t498825\t.\t+\t2\tParent=ENST00000346144\n+19\tprotein_coding\tCDS\t504745\t504963\t.\t+\t2\tParent=ENST00000346144\n+19\tprotein_coding\tmRNA\t496454\t505347\t.\t+\t.\tID=ENST00000215637;geneID=ENSG00000099866;gene_name=MADCAM1\n+19\tprotein_coding\texon\t496454\t496551\t.\t+\t.\tParent=ENST00000215637\n+19\tprotein_coding\texon\t497833\t498117\t.\t+\t.\tParent=ENST00000215637\n+19\tprotein_coding\texon\t498496\t498825\t.\t+\t.\tParent=ENST00000215637\n+19\tprotein_coding\texon\t501669\t501929\t.\t+\t.\tParent=ENST00000215637\n+19\tprotein_coding\texon\t504745\t505347\t.\t+\t.\tParent=ENST00000215637\n+19\tprotein_coding\tCDS\t496500\t496551\t.\t+\t0\tParent=ENST00000215637\n+19\tprotein_coding\tCDS\t497833\t498117\t.\t+\t2\tParent=ENST00000215637\n+19\tprotein_coding\tCDS\t498496\t498825\t.\t+\t2\tParent=ENST00000215637\n+19\tprotein_coding\tCDS\t501669\t501929\t.\t+\t2\tParent=ENST00000215637\n+19\tprotein_coding\tCDS\t504745\t504963\t.\t+\t2\tParent=ENST00000215637\n+19\tprotein_coding\tmRNA\t496500\t504965\t.\t+\t.\tID=ENST00000382683;geneID=ENSG00000099866;gene_name=MADCAM1\n+19\tprotein_coding\texon\t496500\t496551\t.\t+\t.\tParent=ENST00000382683\n+19\tprotein_coding\texon\t498496\t498825\t.\t+\t.\tParent=ENST00000382683\n+19\tprotein_coding\texon\t504745\t504965\t.\t+\t.\tParent=ENST00000382683\n+19\tprotein_coding\tCDS\t496500\t496551\t.\t+\t0\tParent=ENST00000382683\n+19\tprotein_coding\tCDS\t498496\t498825\t.\t+\t2\tParent=ENST00000382683\n+19\tprotein_coding\tCDS\t504745\t504963\t.\t+\t2\tParent=ENST00000382683\n+19\tprotein_coding\tmRNA\t507299\t519654\t.\t+\t.\tID=ENST00000359315;geneID=ENSG00000141933;gene_name=TPGS1\n+19\tprotein_coding\texon\t507299\t507844\t.\t+\t.\tParent=ENST00000359315\n+19\tprotein_coding\texon\t518889\t519654\t.\t+\t.\tParent=ENST00000359315\n+19\tprotein_coding\tCDS\t507507\t507844\t.\t+\t0\tParent=ENST00000359315\n+19\tprotein_coding\tCDS\t518889\t519421\t.\t+\t1\tParent=ENST00000359315\n+19\tretained_intron\ttranscript\t507500\t510372\t.\t+\t.\tID=ENST00000588278;geneID=ENSG00000141933;gene_name=TPGS1\n+19\tretained_intron\texon\t507500\t510372\t.\t+\t.\tParent=ENST00000588278\n'
b
diff -r 000000000000 -r 5f6e6582c01d test-data/Homo_sapiens.GRCh37_19.71.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Homo_sapiens.GRCh37_19.71.gtf Thu May 14 13:08:14 2015 -0400
b
b'@@ -0,0 +1,301 @@\n+19\tsnRNA\texon\t223158\t223261\t.\t-\t.\t gene_id "ENSG00000222329"; transcript_id "ENST00000410397"; exon_number "1"; gene_name "U6"; gene_biotype "snRNA"; transcript_name "U6.795-201"; exon_id "ENSE00001807043";\n+19\tunprocessed_pseudogene\texon\t230084\t230165\t.\t-\t.\t gene_id "ENSG00000267600"; transcript_id "ENST00000587910"; exon_number "1"; gene_name "AC098474.1"; gene_biotype "pseudogene"; transcript_name "AC098474.1-001"; exon_id "ENSE00002913508";\n+19\tunprocessed_pseudogene\texon\t229640\t229709\t.\t-\t.\t gene_id "ENSG00000267600"; transcript_id "ENST00000587910"; exon_number "2"; gene_name "AC098474.1"; gene_biotype "pseudogene"; transcript_name "AC098474.1-001"; exon_id "ENSE00002957540";\n+19\tprocessed_pseudogene\texon\t239145\t239247\t.\t-\t.\t gene_id "ENSG00000267305"; transcript_id "ENST00000588755"; exon_number "1"; gene_name "CTD-3113P16.7"; gene_biotype "pseudogene"; transcript_name "CTD-3113P16.7-001"; exon_id "ENSE00002958670";\n+19\tprocessed_pseudogene\texon\t279495\t280170\t.\t+\t.\t gene_id "ENSG00000267447"; transcript_id "ENST00000589981"; exon_number "1"; gene_name "VN2R11P"; gene_biotype "pseudogene"; transcript_name "VN2R11P-001"; exon_id "ENSE00002783831";\n+19\tprotein_coding\texon\t291285\t291386\t.\t-\t.\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "1"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00001234447";\n+19\tprotein_coding\tCDS\t291285\t291336\t.\t-\t0\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "1"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";\n+19\tprotein_coding\tstart_codon\t291334\t291336\t.\t-\t0\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "1"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001";\n+19\tprotein_coding\texon\t288020\t288171\t.\t-\t.\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "2"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00003304149";\n+19\tprotein_coding\tCDS\t288020\t288171\t.\t-\t2\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "2"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";\n+19\tprotein_coding\texon\t287474\t287751\t.\t-\t.\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "3"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00003352024";\n+19\tprotein_coding\tCDS\t287474\t287751\t.\t-\t0\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "3"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";\n+19\tprotein_coding\texon\t282752\t282809\t.\t-\t.\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "4"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00000951309";\n+19\tprotein_coding\tCDS\t282752\t282809\t.\t-\t1\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "4"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";\n+19\tprotein_coding\texon\t282134\t282310\t.\t-\t.\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "5"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00000951310";\n+19\tprotein_coding\tCDS\t282134\t282310\t.\t-\t0\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "5"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";\n+19\tprotein_coding\texon\t281043\t281537\t.\t-\t.\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "6"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00000951311";\n+19\tprotein_coding\tCDS\t281391\t281537\t.\t-\t0\t gene_id "ENSG00'..b'866"; transcript_id "ENST00000382683"; exon_number "1"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003"; exon_id "ENSE00002252200";\n+19\tprotein_coding\tCDS\t496500\t496551\t.\t+\t0\t gene_id "ENSG00000099866"; transcript_id "ENST00000382683"; exon_number "1"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003"; protein_id "ENSP00000372130";\n+19\tprotein_coding\tstart_codon\t496500\t496502\t.\t+\t0\t gene_id "ENSG00000099866"; transcript_id "ENST00000382683"; exon_number "1"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003";\n+19\tprotein_coding\texon\t498496\t498825\t.\t+\t.\t gene_id "ENSG00000099866"; transcript_id "ENST00000382683"; exon_number "2"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003"; exon_id "ENSE00000655528";\n+19\tprotein_coding\tCDS\t498496\t498825\t.\t+\t2\t gene_id "ENSG00000099866"; transcript_id "ENST00000382683"; exon_number "2"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003"; protein_id "ENSP00000372130";\n+19\tprotein_coding\texon\t504745\t504965\t.\t+\t.\t gene_id "ENSG00000099866"; transcript_id "ENST00000382683"; exon_number "3"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003"; exon_id "ENSE00002232672";\n+19\tprotein_coding\tCDS\t504745\t504962\t.\t+\t2\t gene_id "ENSG00000099866"; transcript_id "ENST00000382683"; exon_number "3"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003"; protein_id "ENSP00000372130";\n+19\tprotein_coding\tstop_codon\t504963\t504965\t.\t+\t0\t gene_id "ENSG00000099866"; transcript_id "ENST00000382683"; exon_number "3"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003";\n+19\tantisense\texon\t507376\t507813\t.\t-\t.\t gene_id "ENSG00000266933"; transcript_id "ENST00000592413"; exon_number "1"; gene_name "AC005775.2"; gene_biotype "antisense"; transcript_name "AC005775.2-001"; exon_id "ENSE00002966233";\n+19\tantisense\texon\t501541\t501624\t.\t-\t.\t gene_id "ENSG00000266933"; transcript_id "ENST00000592413"; exon_number "2"; gene_name "AC005775.2"; gene_biotype "antisense"; transcript_name "AC005775.2-001"; exon_id "ENSE00002861098";\n+19\tantisense\texon\t490046\t490353\t.\t-\t.\t gene_id "ENSG00000266933"; transcript_id "ENST00000592413"; exon_number "3"; gene_name "AC005775.2"; gene_biotype "antisense"; transcript_name "AC005775.2-001"; exon_id "ENSE00001885597";\n+19\tprotein_coding\texon\t507299\t507844\t.\t+\t.\t gene_id "ENSG00000141933"; transcript_id "ENST00000359315"; exon_number "1"; gene_name "TPGS1"; gene_biotype "protein_coding"; transcript_name "TPGS1-001"; exon_id "ENSE00000951304";\n+19\tprotein_coding\tCDS\t507507\t507844\t.\t+\t0\t gene_id "ENSG00000141933"; transcript_id "ENST00000359315"; exon_number "1"; gene_name "TPGS1"; gene_biotype "protein_coding"; transcript_name "TPGS1-001"; protein_id "ENSP00000352265";\n+19\tprotein_coding\tstart_codon\t507507\t507509\t.\t+\t0\t gene_id "ENSG00000141933"; transcript_id "ENST00000359315"; exon_number "1"; gene_name "TPGS1"; gene_biotype "protein_coding"; transcript_name "TPGS1-001";\n+19\tprotein_coding\texon\t518889\t519654\t.\t+\t.\t gene_id "ENSG00000141933"; transcript_id "ENST00000359315"; exon_number "2"; gene_name "TPGS1"; gene_biotype "protein_coding"; transcript_name "TPGS1-001"; exon_id "ENSE00001431625";\n+19\tprotein_coding\tCDS\t518889\t519420\t.\t+\t1\t gene_id "ENSG00000141933"; transcript_id "ENST00000359315"; exon_number "2"; gene_name "TPGS1"; gene_biotype "protein_coding"; transcript_name "TPGS1-001"; protein_id "ENSP00000352265";\n+19\tprotein_coding\tstop_codon\t519421\t519423\t.\t+\t0\t gene_id "ENSG00000141933"; transcript_id "ENST00000359315"; exon_number "2"; gene_name "TPGS1"; gene_biotype "protein_coding"; transcript_name "TPGS1-001";\n+19\tretained_intron\texon\t507500\t510372\t.\t+\t.\t gene_id "ENSG00000141933"; transcript_id "ENST00000588278"; exon_number "1"; gene_name "TPGS1"; gene_biotype "protein_coding"; transcript_name "TPGS1-002"; exon_id "ENSE00002882282";\n'
b
diff -r 000000000000 -r 5f6e6582c01d test-data/chr_replace
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chr_replace Thu May 14 13:08:14 2015 -0400
b
@@ -0,0 +1,2 @@
+1 chr1
+19 chr19
b
diff -r 000000000000 -r 5f6e6582c01d tool-data/fasta_indexes.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample Thu May 14 13:08:14 2015 -0400
b
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id> <dbkey> <display_name> <file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
b
diff -r 000000000000 -r 5f6e6582c01d tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Thu May 14 13:08:14 2015 -0400
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 5f6e6582c01d tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Thu May 14 13:08:14 2015 -0400
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="cufflinks" version="2.2.1">
+        <repository changeset_revision="899067a260d1" name="package_cufflinks_2_2_1" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>