Repository 'gffread'
hg clone https://toolshed.g2.bx.psu.edu/repos/jjohnson/gffread

Changeset 0:d0d6fc2004be (2015-01-05)
Commit message:
Uploaded
added:
cuff_macros.xml
gffread.xml
test-data/Homo_sapiens.GRCh37.71.dna.chromosome.19.fa
test-data/Homo_sapiens.GRCh37.71.dna.chromosome.19.fa.fai
test-data/Homo_sapiens.GRCh37_19.71.gff3
test-data/Homo_sapiens.GRCh37_19.71.gtf
test-data/chr_replace
tool-data/fasta_indexes.loc.sample
tool_data_table_conf.xml.sample
tool_dependencies.xml
b
diff -r 000000000000 -r d0d6fc2004be cuff_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cuff_macros.xml Mon Jan 05 12:53:44 2015 -0500
[
@@ -0,0 +1,91 @@
+<macros>
+  <token name="@VERSION@">2.2.1</token>
+  <xml name="requirements">
+    <requirements>
+      <requirement type="package" version="2.2.1">cufflinks</requirement>
+      <yield />
+    </requirements>
+  </xml>
+  <xml name="stdio">
+    <stdio>
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+        <regex match="Error:" />
+        <regex match="Exception:" />
+    </stdio>
+  </xml>
+  <xml name="condition_inputs">
+    <!-- DEFAULT : use BAM/SAM files -->
+    <conditional name="in_type">
+        <param name="set_in_type" type="select" label="Input data type"
+            help="CuffNorm supports either CXB (from cuffquant) or SAM/BAM input files. Mixing is not supported. Default: SAM/BAM">
+            <option value="BAM">SAM/BAM</option>
+            <option value="CXB">Cuffquant (CXB)</option>
+            <option value="CONDITION_LIST">List of single replicate conditions</option>
+            <option value="CONDITION_REPLICATE_LIST">List of multiple replicate conditions</option>
+        </param>
+        <when value="BAM">
+            <repeat name="conditions" title="Condition" min="2">
+                <param name="name" title="Condition name" type="text" label="Name"/>
+                <param name="samples" label="Replicates" type="data" format="sam,bam" multiple="true"/>
+            </repeat>
+        </when>
+        <when value="CXB">
+            <repeat name="conditions" title="Condition" min="2">
+                <param name="name" title="Condition name" type="text" label="Name"/>
+                <param name="samples" label="Replicates" type="data" format="cxb" multiple="true"/>
+            </repeat>
+        </when>
+        <when value="CONDITION_LIST">
+            <param name="conditions" title="List of Conditions" type="data_collection" collection_type="list" />
+        </when>
+        <when value="CONDITION_REPLICATE_LIST">
+            <param name="conditions" title="List of Conditions" type="data_collection" collection_type="list:list" />
+        </when>
+    </conditional>
+  </xml>
+  <token name="@CONDITION_SAMPLES@">
+            #if $in_type.set_in_type in ['BAM', 'CXB']
+                #for $condition in $in_type.conditions:
+                    #set samples = ','.join( [ str( $sample ) for $sample in $condition.samples ] )
+                    $samples
+                #end for
+            #elif $in_type.set_in_type == 'CONDITION_LIST'
+                #for $sample in $in_type.conditions:
+                    $sample
+                #end for
+            #elif $in_type.set_in_type == 'CONDITION_REPLICATE_LIST'
+                #for $condition_list in $in_type.conditions:
+                    #set samples = ','.join( [ str( $sample ) for $sample in $condition_list ] )
+                    $samples
+                #end for
+            #end if
+  </token>
+  <token name="@CONDITION_LABELS@">
+            #import re
+            #if $in_type.set_in_type in ['BAM', 'CXB']
+                #set labels = '\'' + '\',\''.join( [ str( $condition.name ) for $condition in $in_type.conditions ] ) + '\''
+            #elif $in_type.set_in_type in ['CONDITION_LIST', 'CONDITION_REPLICATE_LIST']
+                #set labels = '\'' + '\',\''.join( map(lambda x: re.sub('[^\w\-_]', '_', x), $in_type.conditions.keys() ) ) + '\''
+            #end if
+            --labels $labels
+  </token>
+  <xml name="cufflinks_gtf_inputs">
+    <param format="gtf" name="inputs" type="data" label="GTF file(s) produced by Cufflinks" help="" multiple="true" />
+    <repeat name="additional_inputs" title="Additional GTF Inputs (Lists)">
+      <param format="gtf" name="additional_inputs" type="data_collection" label="GTF file(s) produced by Cufflinks" help="" />
+    </repeat>
+  </xml>
+  <token name="@CUFFLINKS_GTF_INPUTS@">
+            ## Inputs.
+            #for $input_file in $inputs:
+                "${input_file}"
+            #end for
+            #for $additional_input in $additional_inputs:
+                #for $input_file in $additional_input.additional_inputs:
+                  "${input_file}"
+                #end for
+            #end for
+  </token>
+  <token name="@HAS_MULTIPLE_INPUTS@">getattr(inputs, "__len__", [].__len__)() >= 2</token>
+</macros>
\ No newline at end of file
b
diff -r 000000000000 -r d0d6fc2004be gffread.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gffread.xml Mon Jan 05 12:53:44 2015 -0500
[
b'@@ -0,0 +1,425 @@\n+<tool id="gffread" name="gffread" version="@VERSION@.0">\n+    <description>Filters and/or converts GFF3/GTF2 records</description>\n+    <expand macro="requirements" />\n+    <expand macro="stdio" />\n+    <macros>\n+        <import>cuff_macros.xml</import>\n+        <xml name="fasta_output_select">\n+            <param name="fa_outputs" type="select" display="checkboxes" multiple="true" label="Select fasta outputs">\n+                <option value="-w exons.fa">(-w) a fasta file with spliced exons for each GFF transcript</option>\n+                <option value="-x cds.fa">(-x) a fasta file with spliced CDS for each GFF transcript</option>\n+                <option value="-y pep.fa">(-y)  a protein fasta file with the translation of CDS for each record</option>\n+                <option value="-W">(-W) for each fasta record the exon coordinates projected onto the spliced sequence</option>\n+            </param>\n+        </xml>\n+        <xml name="ref_filtering_select">\n+            <param name="ref_filtering" type="select" display="checkboxes" multiple="true" label="reference based filters">\n+                <option value="-N">(-N) discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus (i.e. not GT-AG, GC-AG or AT-AC)</option>\n+                <option value="-J">(-J) discard any mRNAs that either lack initial START codon or the terminal STOP codon, or have an in-frame stop codon (only print mRNAs with a fulll, valid CDS)</option>\n+                <option value="-V">(-V) discard any mRNAs with CDS having in-frame stop codons</option>\n+                <option value="-H">(-H with -V) check and adjust the starting CDS phase if the original phase leads to a translation with an in-frame stop codon</option>\n+                <option value="-B">(-B with -V) single-exon transcripts are also checked on the opposite strand</option>\n+            </param>\n+        </xml>\n+        <xml name="trackname">\n+            <param name="tname" type="text" value="" size="30" optional="true" label="(-t) Trackname to use in the second column of each GFF output line">\n+                <validator type="regex">\\w+</validator>\n+            </param>\n+        </xml>\n+        <xml name="merge_opts">\n+             <option value="-K">(-K) also collapse shorter, fully contained transcripts with fewer introns than the container</option>\n+             <option value="-Q">(-Q) remove the containment restriction (multi-exon transcripts will be collapsed if just their introns match, while single-exon transcripts can partially overlap 80%)</option>\n+             <option value="-d dupinfo">(-d) output collapsing info</option>\n+        </xml>\n+        <xml name="cluster_opts">\n+             <option value="--force-exons">(--force-exons) make sure that the lowest level GFF features are printed as \'exon\' features</option>\n+             <option value="-Z">(-Z) merge close exons into a single exon (for intron size &lt; 4)</option>\n+        </xml>\n+        <xml name="merge_opt_sel">\n+            <param name="merge_options" type="select" display="checkboxes" multiple="true" label="Merge options">\n+                <expand macro="cluster_opts" />\n+                <expand macro="merge_opts" />\n+            </param>\n+        </xml>\n+        <xml name="cluster_opt_sel">\n+            <param name="merge_options" type="select" display="checkboxes" multiple="true" label="Cluster options">\n+                <expand macro="cluster_opts" />\n+            </param>\n+        </xml>\n+    </macros>\n+    <command>\n+<![CDATA[\n+    #if $reference_genome.source == \'history\':\n+        ln -s $reference_genome.genome_fasta genomeref.fa &&\n+    #end if\n+    gffread $input \n+    #if $reference_genome.source == \'cached\':\n+        -g "${reference_genome.fasta_indexes.fields.path}"\n+        #if $reference_genome.ref_filtering and str($reference_genome.ref_filtering) != \'\':\n+            #echo \' \'.join(str($reference_genome.ref_filtering).split(\',\'))\n+        #end if'..b']]\n+   [-CTVNJMKQAFGUBHZWTOLE] [-w "exons.fa"] [-x "cds.fa"] [-y "tr_cds.fa"]\n+   [-i "maxintron"] \n+ \n+Options: ::\n+\n+  -g  full path to a multi-fasta file with the genomic sequences\n+      for all input mappings, OR a directory with single-fasta files\n+      (one per genomic sequence, with file names matching sequence names)\n+  -s  <seq_info.fsize> is a tab-delimited file providing this info\n+      for each of the mapped sequences:\n+      <seq-name> <seq-length> <seq-description>\n+      (useful for -A option with mRNA/EST/protein mappings)\n+  -i  discard transcripts having an intron larger than <maxintron>\n+  -r  only show transcripts overlapping coordinate range <start>..<end>\n+      (on chromosome/contig <chr>, strand <strand> if provided)\n+  -R  for -r option, discard all transcripts that are not fully \n+      contained within the given range\n+  -U  discard single-exon transcripts\n+  -C  coding only: discard mRNAs that have no CDS feature\n+  -F  full GFF attribute preservation (all attributes are shown)\n+  -G  only parse additional exon attributes from the first exon\n+      and move them to the mRNA level (useful for GTF input)\n+  -A  use the description field from <seq_info.fsize> and add it\n+      as the value for a \'descr\' attribute to the GFF record\n+  \n+  -O  process also non-transcript GFF records (by default non-transcript\n+      records are ignored)\n+  -V  discard any mRNAs with CDS having in-frame stop codons\n+  -H  for -V option, check and adjust the starting CDS phase\n+      if the original phase leads to a translation with an \n+      in-frame stop codon\n+  -B  for -V option, single-exon transcripts are also checked on the\n+      opposite strand\n+  -N  discard multi-exon mRNAs that have any intron with a non-canonical\n+      splice site consensus (i.e. not GT-AG, GC-AG or AT-AC)\n+  -J  discard any mRNAs that either lack initial START codon\n+      or the terminal STOP codon, or have an in-frame stop codon\n+      (only print mRNAs with a fulll, valid CDS)\n+  --no-pseudo: filter out records matching the \'pseudo\' keyword\n+ \n+  -M/--merge : cluster the input transcripts into loci, collapsing matching\n+       transcripts (those with the same exact introns and fully contained)\n+  -d <dupinfo> : for -M option, write collapsing info to file <dupinfo>\n+  --cluster-only: same as --merge but without collapsing matching transcripts\n+  -K  for -M option: also collapse shorter, fully contained transcripts\n+      with fewer introns than the container\n+  -Q  for -M option, remove the containment restriction:\n+      (multi-exon transcripts will be collapsed if just their introns match,\n+      while single-exon transcripts can partially overlap (80%))\n+ \n+  --force-exons: make sure that the lowest level GFF features are printed as \n+      "exon" features\n+  -E  expose (warn about) duplicate transcript IDs and other potential \n+      problems with the given GFF/GTF records\n+  -D  decode url encoded characters within attributes\n+  -Z  merge close exons into a single exon (for intron size<4)\n+  -w  write a fasta file with spliced exons for each GFF transcript\n+  -x  write a fasta file with spliced CDS for each GFF transcript\n+  -W  for -w and -x options, also write for each fasta record the exon\n+      coordinates projected onto the spliced sequence\n+  -y  write a protein fasta file with the translation of CDS for each record\n+  -L  Ensembl GTF to GFF3 conversion (implies -F; should be used with -m)\n+  -m  <chr_replace> is a reference (genomic) sequence replacement table with\n+      this format:\n+      <original_ref_ID> <new_ref_ID>\n+      GFF records on reference sequences that are not found among the\n+      <original_ref_ID> entries in this file will be filtered out\n+  -o  the "filtered" GFF records will be written to <outfile.gff>\n+      (use -o- for printing to stdout)\n+  -t  use <trackname> in the second column of each GFF output line\n+  -T  -o option will output GTF format instead of GFF3\n+\n+\n+\n+\n+\n+]]>\n+    </help>\n+</tool>\n'
b
diff -r 000000000000 -r d0d6fc2004be test-data/Homo_sapiens.GRCh37.71.dna.chromosome.19.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Homo_sapiens.GRCh37.71.dna.chromosome.19.fa Mon Jan 05 12:53:44 2015 -0500
b
b'@@ -0,0 +1,10000 @@\n+>19 dna:chromosome chromosome:GRCh37:19:1:59128983:1 REF\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNN'..b'GTGTCCTCGTGGGGTTTGGGAGCCGGGTCG\n+GCGGCCTCCTCCCCCAAGCTTCATCCAGCCCTGGAGAGACGGGGACCTGCGTCGGCTTTG\n+GGAACTTCAGGGGAAGAACCCTGAGCTCGGGGCAGGAGAATGACATGGTCTGTCTGGGTT\n+CCCATCCCACTGTTTCACTTATGGCTGGAGGAGCCTCAGTTTCCGTGTCAGAGCCGAGAG\n+AGAGGCTGGGCGGCTGTTGGAGGAGGGCTGAGCTCACATCGGTGTCAGGTGCCGGGGGCC\n+CCACTGAAGCCACTCTGGAGTGAGAGGGGGTCCGAGGCTTCTTCCGGTCCCCCAGGTGTG\n+AGAGCTGAGGCCGGAAGGTTTCCCCAGGGACCCAGCCTCATCCCGGCCTCCGGAGCTGGT\n+CTCCCTCCCTGCCTGCGAGGGCCAAGCTGGACAAAGTGGCAGGACCCCCGCGTTTCCGAG\n+GAAGGGGCCCTCGTAGAGGCCTCACTGTTACCTGTCGGGGATGGGTTTTAAATACAGGAT\n+TTAATGATGGTCACTGATGTTTCTGTGACAGCTGAGGGGGGCCTGTCCCTGCTCAGCACT\n+GGGGTTGGACAAGGAGGGCACAGGGTGCCCGGGGAGTGCTGGGGCCCGCCTGGGTCCCTA\n+TGGTGGCCGGATGGGGAAAGGGCTGCCGGACGCGGCTGCACAGAGTGTGAGGGGGAGTGG\n+GGTTGGGCTTTGGGCCTTCCCCTGACCCCAGGTCCCCTCTCTGGGGTCCTGTCTTCTGGA\n+GCTGCCTCCCCCTCCCCAGGGCGAGAGGCAGGAGAGGGTCTCCCAAGGTCACTCAGTCTA\n+GTGTTGGGCAAGGAGAGGGTTGGGGTGGAGAGGGTTGGGGTGCACCGGCAGAGACAGTGA\n+CGCCCTGCAGGGCTCTGGGGTCCCCTCTGCCCCAGGCCTCGGTTTCCCCTTGGTAGGGGA\n+CATGCGTGGGCGCCCTTCTTCCTTCTGACCTCTGGCTGGTGACGACGAGGGTTGCGGCCC\n+CAAGCCCTGCTTGGAGGCATTGTGGTTGGAACGGGCAGGCCCAGCCCCACAAGGGACCCT\n+CAGGGTGGCCACCCCACCTGTCCCTTCTCCTAGGCCCCTCTGCTCTGACCCCCGTAATCC\n+CCAGGAATACTGCCCTGCTGGGCAAGATGGGCTGAGGAGTCCTTTCTGGGGACACAGGAG\n+CAGAAGGCTCAAGGGAGGTGGTTTCCAGACAGTGTGGGCCGTGGATAAGAGGACAGTCCT\n+GGGGTCTCATCCTGCAGTCCCACTTCCATCAGAGGGGAGGTGTGGTACCTGCCCCCAGCT\n+CACCCCAAGAATAAAGGGGGAGAATGTCTGTTAATCATCACCTGGAAAGGAGGCCTTCAT\n+GTTATATGGCCGTTTAAAACCAGGTACCCCTTGGCGGTTTCTAGGTCCTCCTGGTGGTTT\n+CTAGGTCTCCTTGGCGGTTTCTAGGTCCCCCTTGACAGTTTCTAGGTCTCCTTGGTGGTT\n+TCTAGGTCTTCCTGGTGGTTTGTAGATCTCCTTGGCGGTTTCTAGGTCCTCCTGGTGGTT\n+TCTAGGTCCCCCTTGGTGGTTTCTAGGTCCTCCTCGTGGTTTCTAGGTCTCCTTGGCGGT\n+TTCTAGGTCCCCCTTGACAGTTTCTAGGTCTCCTTGGTGGTTTCTAGGTCTTCCTGGTGG\n+TTTCTAGGTCCTCCTGGTGGTTTCTAGGTCCTCCTGGTGGTTTCTAGGTCCCCCTTGGTG\n+GTTTCTAGGCCCTCCTGGTGGTTTCTAGGTCTCCTTGGCAGTTTCTAGGTCCTCCTGGTG\n+GTTTCTAGGTCTCCTTGGCGGTTTCTAGGTCTCCTTGGCAGTTTCTAGGTCCCCCTTGGT\n+GGTCTCTGGAGCCCCCTGGGAGTTTCTCCTCGCCCCTCGTGGGGGAGGTTTCTAGGCCCT\n+GTGGCCTTCTCAACTGCTCTTTCCTGCTGGTTTTGTCCCAAGACTGGTGCTCGAAGGTGG\n+GAGGCAGGTCTCAATCCAGAGGGTGAGGCTGGGGCTGAGCTCCACGGTGAGCCCCCTCGG\n+GGCTGAGGGGCAGGGGGCCAGCGGCAGGCGGGTGGCATCCCCTCAGCCCACTCGGTGCTG\n+GGGCCCTGGGGGTCTCGTTGCTGAGCAGATGTGTCGGACTGGGGGCCTGAGCCCTCATGT\n+CCTGTCTAGACTCAGGGAAAGGAAGGCCTTAAATTTCTAGTCCTTTTAGTCATTTGATTT\n+CACCCTTCAAGCTTGTGAGGGGACCCCTCCCCACCCCGAGATCAAGTCAGCCTTTTTTTT\n+TTTTGAGACAGAGTCTCACTCTGTCACCCAGGCTGGAGTGCAATGGTGCAGTTTTGGCTC\n+ACTGCAGCCTCGACTTCATGGGCTCGAGCAATCCTCCTACCTCAGCCTCCCAAGTAGCTG\n+GGTCCACAGGTGCGCACCACCACGCTGGCTAATTTTTGTATTTTTTGTAGAGACGGGATC\n+TCACTATCTTGCCCAGGCTGGTCCTGAACTCCTGAGCTTCTGTGATCCTTTAGTCTTGGC\n+CTCCCAAGGTGCTGGCATTAGAAGCATGAGCCCCTGCACCCACCCTTCCCTTCTTTTTTG\n+AGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCGATGGCGCGATCTCGGCTCACTGC\n+AAGCTCCGCCTCCCGGGTTCATGCCGTTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGCCT\n+ACAGGCGCCCGCCACCTCACACAAAAAAATAATACATTTTTGTATTTTTAGTAGAGACGG\n+GGTTTCACCGTGTTGGCCAGGATGGTCTCGATCTCTTGACCTTGTAATCTGCCCGCCTCA\n+GCCTCCTTAAGCCCTGGAATTACAGGCGTGAGCCACCGTGCCCAGCCTGAGTCAGCCCTT\n+CTCTAGGTTTTATCCCCAGCCTGGGTGGTTCTTGGGGGGTGTCCCGAAGGCTCAGCCCCT\n+CCCCATCCCGAGGCGGTGGATGCTTGGAAGAAGAGACTGAGGTTCTCAGGGGCTGCAGGA\n+ACTTGTCCAAGGTCACTGTGCCAGCAGGCGGGTGCCAGCCCAGGTCTGGCTGATGCCACC\n+ACGATGTGAGTCGCTGGGTCCCTTCCAGCGTTTGGCTCTTGCAGACCGAGCTGCTGTGAA\n+CATTTTGGTACAAATGGCTTTTTAATTTTTTTCTCTTCCTTTTGGACCGTTTCCTCGGGA\n+TCATTTCCCCGAAGTGGAGCGTCTGGGTCACCCCGCAGACGTGGTGTTAACAGCTTTTCC\n+CTCGAGGAGCCGTCCAGAAGCAAGGAGCCGACTTTCGGCGGCTCCTGAAACAAAGGTCCC\n+GGCGGCCACGCCAGCGATGAGCTCCCAGGTTTTTCTTTATTGGTGTTAACTTTGGGGAGT\n+CCCTCCAGTCCCTCAGCATCTCCTCCGAAAGGATGTGGGGACCCCGGCCATGATAGGGAG\n+GGGCTGGGCCTGGGGTGGGGATAAGGCCTCAAGAATTGGCCGGGCGCGGTGGCTCACGCC\n+TGTAATCCCAGCACTTTAGGAGGCCAAGGCGGGCGGATCACGAGGTCAGGAGTTCGAGAC\n+CATCCTGGCTAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAAAAAAAATT\n+ATCTGGGCGTGGTGGCGGGCGCCTATAGTCCCAGCTACTCGGGAGGGTGAGGCAAGAGAA\n+TGGTGTGAACCCTGGAGGCGGAGCTTGCAGTGAGCCGAGATTGTGCCATCGCACTCCAGC\n+CTGGGCGACAGAGTGAGACTCTGTCTCAAAAAAGAAAACCTCAAGAACTAAGAAGGCTGA\n+GTTGGTACAGGGATGGGAAGGGGTCCCGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG\n+TGTGTGTGTGTGTGTGTCAGAGGGTCTCATGTTACCCAGGCTGGTCTTGAACTCCTGGGC\n'
b
diff -r 000000000000 -r d0d6fc2004be test-data/Homo_sapiens.GRCh37.71.dna.chromosome.19.fa.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Homo_sapiens.GRCh37.71.dna.chromosome.19.fa.fai Mon Jan 05 12:53:44 2015 -0500
b
@@ -0,0 +1,1 @@
+19 599940 57 60 61
b
diff -r 000000000000 -r d0d6fc2004be test-data/Homo_sapiens.GRCh37_19.71.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Homo_sapiens.GRCh37_19.71.gff3 Mon Jan 05 12:53:44 2015 -0500
b
b'@@ -0,0 +1,313 @@\n+# gffread test-data/Homo_sapiens.GRCh37_19.71.gtf -o test-data/Homo_sapiens.GRCh37_19.71.gff3\n+##gff-version 3\n+19\tsnRNA\ttranscript\t223158\t223261\t.\t-\t.\tID=ENST00000410397;geneID=ENSG00000222329;gene_name=U6\n+19\tsnRNA\texon\t223158\t223261\t.\t-\t.\tParent=ENST00000410397\n+19\tunprocessed_pseudogene\ttranscript\t229640\t230165\t.\t-\t.\tID=ENST00000587910;geneID=ENSG00000267600;gene_name=AC098474.1\n+19\tunprocessed_pseudogene\texon\t229640\t229709\t.\t-\t.\tParent=ENST00000587910\n+19\tunprocessed_pseudogene\texon\t230084\t230165\t.\t-\t.\tParent=ENST00000587910\n+19\tprocessed_pseudogene\ttranscript\t239145\t239247\t.\t-\t.\tID=ENST00000588755;geneID=ENSG00000267305;gene_name=CTD-3113P16.7\n+19\tprocessed_pseudogene\texon\t239145\t239247\t.\t-\t.\tParent=ENST00000588755\n+19\tprocessed_pseudogene\ttranscript\t279495\t280170\t.\t+\t.\tID=ENST00000589981;geneID=ENSG00000267447;gene_name=VN2R11P\n+19\tprocessed_pseudogene\texon\t279495\t280170\t.\t+\t.\tParent=ENST00000589981\n+19\tprotein_coding\tmRNA\t281043\t291386\t.\t-\t.\tID=ENST00000269812;geneID=ENSG00000141934;gene_name=PPAP2C\n+19\tprotein_coding\texon\t281043\t281537\t.\t-\t.\tParent=ENST00000269812\n+19\tprotein_coding\texon\t282134\t282310\t.\t-\t.\tParent=ENST00000269812\n+19\tprotein_coding\texon\t282752\t282809\t.\t-\t.\tParent=ENST00000269812\n+19\tprotein_coding\texon\t287474\t287751\t.\t-\t.\tParent=ENST00000269812\n+19\tprotein_coding\texon\t288020\t288171\t.\t-\t.\tParent=ENST00000269812\n+19\tprotein_coding\texon\t291285\t291386\t.\t-\t.\tParent=ENST00000269812\n+19\tprotein_coding\tCDS\t281391\t281537\t.\t-\t0\tParent=ENST00000269812\n+19\tprotein_coding\tCDS\t282134\t282310\t.\t-\t0\tParent=ENST00000269812\n+19\tprotein_coding\tCDS\t282752\t282809\t.\t-\t1\tParent=ENST00000269812\n+19\tprotein_coding\tCDS\t287474\t287751\t.\t-\t0\tParent=ENST00000269812\n+19\tprotein_coding\tCDS\t288020\t288171\t.\t-\t2\tParent=ENST00000269812\n+19\tprotein_coding\tCDS\t291285\t291336\t.\t-\t0\tParent=ENST00000269812\n+19\tprotein_coding\tmRNA\t281345\t291393\t.\t-\t.\tID=ENST00000434325;geneID=ENSG00000141934;gene_name=PPAP2C\n+19\tprotein_coding\texon\t281345\t281537\t.\t-\t.\tParent=ENST00000434325\n+19\tprotein_coding\texon\t282134\t282310\t.\t-\t.\tParent=ENST00000434325\n+19\tprotein_coding\texon\t282752\t282809\t.\t-\t.\tParent=ENST00000434325\n+19\tprotein_coding\texon\t287474\t287751\t.\t-\t.\tParent=ENST00000434325\n+19\tprotein_coding\texon\t288020\t288171\t.\t-\t.\tParent=ENST00000434325\n+19\tprotein_coding\texon\t291326\t291393\t.\t-\t.\tParent=ENST00000434325\n+19\tprotein_coding\tCDS\t281391\t281537\t.\t-\t0\tParent=ENST00000434325\n+19\tprotein_coding\tCDS\t282134\t282310\t.\t-\t0\tParent=ENST00000434325\n+19\tprotein_coding\tCDS\t282752\t282809\t.\t-\t1\tParent=ENST00000434325\n+19\tprotein_coding\tCDS\t287474\t287751\t.\t-\t0\tParent=ENST00000434325\n+19\tprotein_coding\tCDS\t288020\t288055\t.\t-\t0\tParent=ENST00000434325\n+19\tprotein_coding\tmRNA\t281388\t291200\t.\t-\t.\tID=ENST00000327790;geneID=ENSG00000141934;gene_name=PPAP2C\n+19\tprotein_coding\texon\t281388\t281537\t.\t-\t.\tParent=ENST00000327790\n+19\tprotein_coding\texon\t282134\t282310\t.\t-\t.\tParent=ENST00000327790\n+19\tprotein_coding\texon\t282752\t282809\t.\t-\t.\tParent=ENST00000327790\n+19\tprotein_coding\texon\t287474\t287751\t.\t-\t.\tParent=ENST00000327790\n+19\tprotein_coding\texon\t288020\t288171\t.\t-\t.\tParent=ENST00000327790\n+19\tprotein_coding\texon\t290952\t291200\t.\t-\t.\tParent=ENST00000327790\n+19\tprotein_coding\tCDS\t281391\t281537\t.\t-\t0\tParent=ENST00000327790\n+19\tprotein_coding\tCDS\t282134\t282310\t.\t-\t0\tParent=ENST00000327790\n+19\tprotein_coding\tCDS\t282752\t282809\t.\t-\t1\tParent=ENST00000327790\n+19\tprotein_coding\tCDS\t287474\t287751\t.\t-\t0\tParent=ENST00000327790\n+19\tprotein_coding\tCDS\t288020\t288171\t.\t-\t2\tParent=ENST00000327790\n+19\tprotein_coding\tCDS\t290952\t291066\t.\t-\t0\tParent=ENST00000327790\n+19\tprotein_coding\tmRNA\t281991\t287636\t.\t-\t.\tID=ENST00000586998;geneID=ENSG00000141934;gene_name=PPAP2C\n+19\tprotein_coding\texon\t281991\t282310\t.\t-\t.\tParent=ENST00000586998\n+19\tprotein_coding\texon\t282752\t282809\t.\t-\t.\tParent=ENST00000586998\n+19\tprotein_coding\texon\t287474\t287636\t.\t-\t.\tParent=ENST00000586998\n+19\tprotein_coding\tCDS\t282125\t282310\t.\t-\t0\tParent=ENST00000586998\n+19\tprotein_coding\tCDS\t282752\t282809\t.\t-\t1\tParent=ENS'..b'72501\t.\t-\t2\tParent=ENST00000315489\n+19\tprotein_coding\tCDS\t474621\t474747\t.\t-\t0\tParent=ENST00000315489\n+19\tprotein_coding\tmRNA\t463467\t474880\t.\t-\t.\tID=ENST00000382696;geneID=ENSG00000181781;gene_name=ODF3L2\n+19\tprotein_coding\texon\t463467\t464364\t.\t-\t.\tParent=ENST00000382696\n+19\tprotein_coding\texon\t467649\t467762\t.\t-\t.\tParent=ENST00000382696\n+19\tprotein_coding\texon\t474621\t474880\t.\t-\t.\tParent=ENST00000382696\n+19\tprotein_coding\tCDS\t463847\t464364\t.\t-\t2\tParent=ENST00000382696\n+19\tprotein_coding\tCDS\t467649\t467762\t.\t-\t2\tParent=ENST00000382696\n+19\tprotein_coding\tCDS\t474621\t474747\t.\t-\t0\tParent=ENST00000382696\n+19\tretained_intron\ttranscript\t464146\t472631\t.\t-\t.\tID=ENST00000591681;geneID=ENSG00000181781;gene_name=ODF3L2\n+19\tretained_intron\texon\t464146\t464364\t.\t-\t.\tParent=ENST00000591681\n+19\tretained_intron\texon\t467649\t467762\t.\t-\t.\tParent=ENST00000591681\n+19\tretained_intron\texon\t472394\t472631\t.\t-\t.\tParent=ENST00000591681\n+19\tprocessed_transcript\ttranscript\t489176\t505342\t.\t+\t.\tID=ENST00000587541;geneID=ENSG00000099866;gene_name=MADCAM1\n+19\tprocessed_transcript\texon\t489176\t490039\t.\t+\t.\tParent=ENST00000587541\n+19\tprocessed_transcript\texon\t501669\t501929\t.\t+\t.\tParent=ENST00000587541\n+19\tprocessed_transcript\texon\t504745\t505342\t.\t+\t.\tParent=ENST00000587541\n+19\tantisense\ttranscript\t490046\t507813\t.\t-\t.\tID=ENST00000592413;geneID=ENSG00000266933;gene_name=AC005775.2\n+19\tantisense\texon\t490046\t490353\t.\t-\t.\tParent=ENST00000592413\n+19\tantisense\texon\t501541\t501624\t.\t-\t.\tParent=ENST00000592413\n+19\tantisense\texon\t507376\t507813\t.\t-\t.\tParent=ENST00000592413\n+19\tprotein_coding\tmRNA\t496454\t505207\t.\t+\t.\tID=ENST00000346144;geneID=ENSG00000099866;gene_name=MADCAM1\n+19\tprotein_coding\texon\t496454\t496551\t.\t+\t.\tParent=ENST00000346144\n+19\tprotein_coding\texon\t497833\t498117\t.\t+\t.\tParent=ENST00000346144\n+19\tprotein_coding\texon\t498496\t498825\t.\t+\t.\tParent=ENST00000346144\n+19\tprotein_coding\texon\t504745\t505207\t.\t+\t.\tParent=ENST00000346144\n+19\tprotein_coding\tCDS\t496500\t496551\t.\t+\t0\tParent=ENST00000346144\n+19\tprotein_coding\tCDS\t497833\t498117\t.\t+\t2\tParent=ENST00000346144\n+19\tprotein_coding\tCDS\t498496\t498825\t.\t+\t2\tParent=ENST00000346144\n+19\tprotein_coding\tCDS\t504745\t504963\t.\t+\t2\tParent=ENST00000346144\n+19\tprotein_coding\tmRNA\t496454\t505347\t.\t+\t.\tID=ENST00000215637;geneID=ENSG00000099866;gene_name=MADCAM1\n+19\tprotein_coding\texon\t496454\t496551\t.\t+\t.\tParent=ENST00000215637\n+19\tprotein_coding\texon\t497833\t498117\t.\t+\t.\tParent=ENST00000215637\n+19\tprotein_coding\texon\t498496\t498825\t.\t+\t.\tParent=ENST00000215637\n+19\tprotein_coding\texon\t501669\t501929\t.\t+\t.\tParent=ENST00000215637\n+19\tprotein_coding\texon\t504745\t505347\t.\t+\t.\tParent=ENST00000215637\n+19\tprotein_coding\tCDS\t496500\t496551\t.\t+\t0\tParent=ENST00000215637\n+19\tprotein_coding\tCDS\t497833\t498117\t.\t+\t2\tParent=ENST00000215637\n+19\tprotein_coding\tCDS\t498496\t498825\t.\t+\t2\tParent=ENST00000215637\n+19\tprotein_coding\tCDS\t501669\t501929\t.\t+\t2\tParent=ENST00000215637\n+19\tprotein_coding\tCDS\t504745\t504963\t.\t+\t2\tParent=ENST00000215637\n+19\tprotein_coding\tmRNA\t496500\t504965\t.\t+\t.\tID=ENST00000382683;geneID=ENSG00000099866;gene_name=MADCAM1\n+19\tprotein_coding\texon\t496500\t496551\t.\t+\t.\tParent=ENST00000382683\n+19\tprotein_coding\texon\t498496\t498825\t.\t+\t.\tParent=ENST00000382683\n+19\tprotein_coding\texon\t504745\t504965\t.\t+\t.\tParent=ENST00000382683\n+19\tprotein_coding\tCDS\t496500\t496551\t.\t+\t0\tParent=ENST00000382683\n+19\tprotein_coding\tCDS\t498496\t498825\t.\t+\t2\tParent=ENST00000382683\n+19\tprotein_coding\tCDS\t504745\t504963\t.\t+\t2\tParent=ENST00000382683\n+19\tprotein_coding\tmRNA\t507299\t519654\t.\t+\t.\tID=ENST00000359315;geneID=ENSG00000141933;gene_name=TPGS1\n+19\tprotein_coding\texon\t507299\t507844\t.\t+\t.\tParent=ENST00000359315\n+19\tprotein_coding\texon\t518889\t519654\t.\t+\t.\tParent=ENST00000359315\n+19\tprotein_coding\tCDS\t507507\t507844\t.\t+\t0\tParent=ENST00000359315\n+19\tprotein_coding\tCDS\t518889\t519421\t.\t+\t1\tParent=ENST00000359315\n+19\tretained_intron\ttranscript\t507500\t510372\t.\t+\t.\tID=ENST00000588278;geneID=ENSG00000141933;gene_name=TPGS1\n+19\tretained_intron\texon\t507500\t510372\t.\t+\t.\tParent=ENST00000588278\n'
b
diff -r 000000000000 -r d0d6fc2004be test-data/Homo_sapiens.GRCh37_19.71.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Homo_sapiens.GRCh37_19.71.gtf Mon Jan 05 12:53:44 2015 -0500
b
b'@@ -0,0 +1,301 @@\n+19\tsnRNA\texon\t223158\t223261\t.\t-\t.\t gene_id "ENSG00000222329"; transcript_id "ENST00000410397"; exon_number "1"; gene_name "U6"; gene_biotype "snRNA"; transcript_name "U6.795-201"; exon_id "ENSE00001807043";\n+19\tunprocessed_pseudogene\texon\t230084\t230165\t.\t-\t.\t gene_id "ENSG00000267600"; transcript_id "ENST00000587910"; exon_number "1"; gene_name "AC098474.1"; gene_biotype "pseudogene"; transcript_name "AC098474.1-001"; exon_id "ENSE00002913508";\n+19\tunprocessed_pseudogene\texon\t229640\t229709\t.\t-\t.\t gene_id "ENSG00000267600"; transcript_id "ENST00000587910"; exon_number "2"; gene_name "AC098474.1"; gene_biotype "pseudogene"; transcript_name "AC098474.1-001"; exon_id "ENSE00002957540";\n+19\tprocessed_pseudogene\texon\t239145\t239247\t.\t-\t.\t gene_id "ENSG00000267305"; transcript_id "ENST00000588755"; exon_number "1"; gene_name "CTD-3113P16.7"; gene_biotype "pseudogene"; transcript_name "CTD-3113P16.7-001"; exon_id "ENSE00002958670";\n+19\tprocessed_pseudogene\texon\t279495\t280170\t.\t+\t.\t gene_id "ENSG00000267447"; transcript_id "ENST00000589981"; exon_number "1"; gene_name "VN2R11P"; gene_biotype "pseudogene"; transcript_name "VN2R11P-001"; exon_id "ENSE00002783831";\n+19\tprotein_coding\texon\t291285\t291386\t.\t-\t.\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "1"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00001234447";\n+19\tprotein_coding\tCDS\t291285\t291336\t.\t-\t0\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "1"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";\n+19\tprotein_coding\tstart_codon\t291334\t291336\t.\t-\t0\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "1"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001";\n+19\tprotein_coding\texon\t288020\t288171\t.\t-\t.\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "2"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00003304149";\n+19\tprotein_coding\tCDS\t288020\t288171\t.\t-\t2\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "2"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";\n+19\tprotein_coding\texon\t287474\t287751\t.\t-\t.\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "3"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00003352024";\n+19\tprotein_coding\tCDS\t287474\t287751\t.\t-\t0\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "3"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";\n+19\tprotein_coding\texon\t282752\t282809\t.\t-\t.\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "4"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00000951309";\n+19\tprotein_coding\tCDS\t282752\t282809\t.\t-\t1\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "4"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";\n+19\tprotein_coding\texon\t282134\t282310\t.\t-\t.\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "5"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00000951310";\n+19\tprotein_coding\tCDS\t282134\t282310\t.\t-\t0\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "5"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; protein_id "ENSP00000269812";\n+19\tprotein_coding\texon\t281043\t281537\t.\t-\t.\t gene_id "ENSG00000141934"; transcript_id "ENST00000269812"; exon_number "6"; gene_name "PPAP2C"; gene_biotype "protein_coding"; transcript_name "PPAP2C-001"; exon_id "ENSE00000951311";\n+19\tprotein_coding\tCDS\t281391\t281537\t.\t-\t0\t gene_id "ENSG00'..b'866"; transcript_id "ENST00000382683"; exon_number "1"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003"; exon_id "ENSE00002252200";\n+19\tprotein_coding\tCDS\t496500\t496551\t.\t+\t0\t gene_id "ENSG00000099866"; transcript_id "ENST00000382683"; exon_number "1"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003"; protein_id "ENSP00000372130";\n+19\tprotein_coding\tstart_codon\t496500\t496502\t.\t+\t0\t gene_id "ENSG00000099866"; transcript_id "ENST00000382683"; exon_number "1"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003";\n+19\tprotein_coding\texon\t498496\t498825\t.\t+\t.\t gene_id "ENSG00000099866"; transcript_id "ENST00000382683"; exon_number "2"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003"; exon_id "ENSE00000655528";\n+19\tprotein_coding\tCDS\t498496\t498825\t.\t+\t2\t gene_id "ENSG00000099866"; transcript_id "ENST00000382683"; exon_number "2"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003"; protein_id "ENSP00000372130";\n+19\tprotein_coding\texon\t504745\t504965\t.\t+\t.\t gene_id "ENSG00000099866"; transcript_id "ENST00000382683"; exon_number "3"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003"; exon_id "ENSE00002232672";\n+19\tprotein_coding\tCDS\t504745\t504962\t.\t+\t2\t gene_id "ENSG00000099866"; transcript_id "ENST00000382683"; exon_number "3"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003"; protein_id "ENSP00000372130";\n+19\tprotein_coding\tstop_codon\t504963\t504965\t.\t+\t0\t gene_id "ENSG00000099866"; transcript_id "ENST00000382683"; exon_number "3"; gene_name "MADCAM1"; gene_biotype "protein_coding"; transcript_name "MADCAM1-003";\n+19\tantisense\texon\t507376\t507813\t.\t-\t.\t gene_id "ENSG00000266933"; transcript_id "ENST00000592413"; exon_number "1"; gene_name "AC005775.2"; gene_biotype "antisense"; transcript_name "AC005775.2-001"; exon_id "ENSE00002966233";\n+19\tantisense\texon\t501541\t501624\t.\t-\t.\t gene_id "ENSG00000266933"; transcript_id "ENST00000592413"; exon_number "2"; gene_name "AC005775.2"; gene_biotype "antisense"; transcript_name "AC005775.2-001"; exon_id "ENSE00002861098";\n+19\tantisense\texon\t490046\t490353\t.\t-\t.\t gene_id "ENSG00000266933"; transcript_id "ENST00000592413"; exon_number "3"; gene_name "AC005775.2"; gene_biotype "antisense"; transcript_name "AC005775.2-001"; exon_id "ENSE00001885597";\n+19\tprotein_coding\texon\t507299\t507844\t.\t+\t.\t gene_id "ENSG00000141933"; transcript_id "ENST00000359315"; exon_number "1"; gene_name "TPGS1"; gene_biotype "protein_coding"; transcript_name "TPGS1-001"; exon_id "ENSE00000951304";\n+19\tprotein_coding\tCDS\t507507\t507844\t.\t+\t0\t gene_id "ENSG00000141933"; transcript_id "ENST00000359315"; exon_number "1"; gene_name "TPGS1"; gene_biotype "protein_coding"; transcript_name "TPGS1-001"; protein_id "ENSP00000352265";\n+19\tprotein_coding\tstart_codon\t507507\t507509\t.\t+\t0\t gene_id "ENSG00000141933"; transcript_id "ENST00000359315"; exon_number "1"; gene_name "TPGS1"; gene_biotype "protein_coding"; transcript_name "TPGS1-001";\n+19\tprotein_coding\texon\t518889\t519654\t.\t+\t.\t gene_id "ENSG00000141933"; transcript_id "ENST00000359315"; exon_number "2"; gene_name "TPGS1"; gene_biotype "protein_coding"; transcript_name "TPGS1-001"; exon_id "ENSE00001431625";\n+19\tprotein_coding\tCDS\t518889\t519420\t.\t+\t1\t gene_id "ENSG00000141933"; transcript_id "ENST00000359315"; exon_number "2"; gene_name "TPGS1"; gene_biotype "protein_coding"; transcript_name "TPGS1-001"; protein_id "ENSP00000352265";\n+19\tprotein_coding\tstop_codon\t519421\t519423\t.\t+\t0\t gene_id "ENSG00000141933"; transcript_id "ENST00000359315"; exon_number "2"; gene_name "TPGS1"; gene_biotype "protein_coding"; transcript_name "TPGS1-001";\n+19\tretained_intron\texon\t507500\t510372\t.\t+\t.\t gene_id "ENSG00000141933"; transcript_id "ENST00000588278"; exon_number "1"; gene_name "TPGS1"; gene_biotype "protein_coding"; transcript_name "TPGS1-002"; exon_id "ENSE00002882282";\n'
b
diff -r 000000000000 -r d0d6fc2004be test-data/chr_replace
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chr_replace Mon Jan 05 12:53:44 2015 -0500
b
@@ -0,0 +1,2 @@
+1 chr1
+19 chr19
b
diff -r 000000000000 -r d0d6fc2004be tool-data/fasta_indexes.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample Mon Jan 05 12:53:44 2015 -0500
b
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id> <dbkey> <display_name> <file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
b
diff -r 000000000000 -r d0d6fc2004be tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon Jan 05 12:53:44 2015 -0500
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r d0d6fc2004be tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Mon Jan 05 12:53:44 2015 -0500
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="cufflinks" version="2.2.1">
+        <repository changeset_revision="899067a260d1" name="package_cufflinks_2_2_1" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>